diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..5043f75f8ea0577fe2a9c6cd581646b721ab24ad 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,843 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0190279680/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0014018560/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0005007360/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1902796800/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0050073600/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0090132480/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0017024000/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0110161920/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1402060800/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0019025920/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0500736000/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0015022080/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0008007680/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0130191360/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1802649600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0018022400/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0070103040/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0801177600/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0901324800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0016020480/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0080117760/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0020029440/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0013015040/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0007009280/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0120176640/model filter=lfs diff=lfs merge=lfs -text +drc33/gobfm3wm/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc33/jl6bq8ih/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_1602355200/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_1001472000/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0040058880/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1502208000/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0170250240/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0012016640/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0000998400/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0004003840/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0003000320/model filter=lfs diff=lfs merge=lfs -text +drc33/bkynosqi/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_0140206080/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0030044160/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0200294400/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_2002944000/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0100147200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0006005760/model filter=lfs diff=lfs merge=lfs -text +drc11/3a2pv9yr/cp_0011013120/model filter=lfs diff=lfs merge=lfs -text +drc11/eue6pax7/cp_0150220800/model filter=lfs diff=lfs merge=lfs -text +drc11/nom9jda6/cp_0002001920/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0009011200/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_0160235520/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0010014720/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_1702502400/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_0600883200/model filter=lfs diff=lfs merge=lfs -text +drc11/v2fm2qze/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +drc33/q4mjldyy/cp_0180264960/model filter=lfs diff=lfs merge=lfs -text +resnet/13qckf6e/cp_0701030400/model filter=lfs diff=lfs merge=lfs -text +resnet/syb50iz7/cp_1301913600/model filter=lfs diff=lfs merge=lfs -text +resnet/zgyp3v0o/cp_0300441600/model filter=lfs diff=lfs merge=lfs -text +drc11/3i5nocf6/cp_0400588800/model filter=lfs diff=lfs merge=lfs -text +drc33/qqp0kn15/cp_0060088320/model filter=lfs diff=lfs merge=lfs -text +resnet/28n07cac/cp_1201766400/model filter=lfs diff=lfs merge=lfs -text +resnet/8ul1b23e/cp_1101619200/model filter=lfs diff=lfs merge=lfs -text diff --git a/drc11/3a2pv9yr/cp_0000998400/cfg.json b/drc11/3a2pv9yr/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8c6534394a0ae19d83779c3a980fcbca14e2e0b6 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0000998400/model b/drc11/3a2pv9yr/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..df3d4637d01f0293e726e7e4eacf3e4b12ab541e --- /dev/null +++ b/drc11/3a2pv9yr/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d5d0b7669feb44f14b218368ec57cc0063d9d89b08cdad90dba75f4771d158 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0002001920/cfg.json b/drc11/3a2pv9yr/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f2df877977b083f3bd3f226e1d2b9b08b68e2f51 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0002001920/model b/drc11/3a2pv9yr/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..2e4728bf1d8d140a96341870cae4a28c0d6f2449 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b81c36f640f3981838f4e9f3ce579fba910072bae937c0032b557bc96779db1 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0003000320/cfg.json b/drc11/3a2pv9yr/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a1a14d5c4e05c15c33884be3403563d75c464e90 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 586, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0003000320/model b/drc11/3a2pv9yr/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..e1b833e8790c6bbc2e81502541b63f73acff922f --- /dev/null +++ b/drc11/3a2pv9yr/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dc3b3ca5a4ca38ae68e9807e31ef305d1713d835c9dccb6beb3f4051b89758 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0004003840/cfg.json b/drc11/3a2pv9yr/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6ab995c7596a090b3db8808aa56e59aedf46235a --- /dev/null +++ b/drc11/3a2pv9yr/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 782, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0004003840/model b/drc11/3a2pv9yr/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..501d124d5c25e97fc7506d36046e561bd5b84c88 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae9c6ec63fe4e5d5b40c787bfd11b575a127302fbed1fe57264e58bf802c01d +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0005007360/cfg.json b/drc11/3a2pv9yr/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c2ab70aa47b4e17bd91522f295bd2c777d58c8e --- /dev/null +++ b/drc11/3a2pv9yr/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 978, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0005007360/model b/drc11/3a2pv9yr/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..b0a3abbc1edfaec7ccc506be84436478dafa36c0 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40dc8fa65e31c2ea6a98f2d8d220a8ea0dbe577775ac42921a9f1eee980301bb +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0006005760/cfg.json b/drc11/3a2pv9yr/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d20715da9cddeec5e1b16e473d7ca41a83c89047 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1173, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0006005760/model b/drc11/3a2pv9yr/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..d417ff1f0ac9ea058ec26c3c9c5c9b9162e410aa --- /dev/null +++ b/drc11/3a2pv9yr/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f84167da696add5d70013a1a9625ff394d995537500e7210e398a4da2748f659 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0007009280/cfg.json b/drc11/3a2pv9yr/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..632db47f494c747358668341bf96481e18d38be7 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1369, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0007009280/model b/drc11/3a2pv9yr/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..c079f7fea88d1b5fa9a6591c976a6d993e6ec7e0 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f727d792bcae61a221ddb11e252056ec1ee0a9d432a440833983636c23370f4 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0008007680/cfg.json b/drc11/3a2pv9yr/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2badcae4da1c5083360672e7c330b1b0537c2ce9 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1564, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0008007680/model b/drc11/3a2pv9yr/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..48f469d1e0ac39027e95fc4d6870639585c66b53 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1c319a14973bfd4068cff5f96fd1b06b0dc08edc24667fc9606b799f303e6b +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0009011200/cfg.json b/drc11/3a2pv9yr/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2be05a1d5adbbd1c77da61ab89c077b9538c49c8 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1760, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0009011200/model b/drc11/3a2pv9yr/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..1d094ee2cd5c1849820211d10ed97a54d45e8951 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c132ff12e2ebff6c4b3197c887ce33d1e64ed201eef3c1f2bde41a3c844417 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0010014720/cfg.json b/drc11/3a2pv9yr/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..831c5b2429099707efc470a0d7c08a62375024f3 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1956, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0010014720/model b/drc11/3a2pv9yr/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..f0dc4ab1cf82fa0da715d862e06a408544143ab0 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d0189e795a0e548183cdf548de961ca97d3feb5d3fc2218fa8cb07d4969643b +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0011013120/cfg.json b/drc11/3a2pv9yr/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6a22c349f110f6204152ff209cb18b346f32a870 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2151, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0011013120/model b/drc11/3a2pv9yr/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..2983ec525fb982de6052b87c37cda9112acde7cf --- /dev/null +++ b/drc11/3a2pv9yr/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8847811ab9a593bfba9c88f2c380f69294177545c9fc44c2e6cfe174b3236b10 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0012016640/cfg.json b/drc11/3a2pv9yr/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b955ecb86be6ab0b60190bd676bf4ed413b46b8d --- /dev/null +++ b/drc11/3a2pv9yr/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2347, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0012016640/model b/drc11/3a2pv9yr/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..54f0d3650e5e5d565319f82e40612e832722f6ec --- /dev/null +++ b/drc11/3a2pv9yr/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb44525aa96168b6e97bab4553cb372b0dc370017dee602d497fb2347938af1f +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0013015040/cfg.json b/drc11/3a2pv9yr/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b9d1fb1a77886c8d95c4f9f99aa4218f0996f797 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2542, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0013015040/model b/drc11/3a2pv9yr/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..a6e5a314020ef44c9eb1fffbd96a3d591e16873f --- /dev/null +++ b/drc11/3a2pv9yr/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6663153cd84febe6cc9add823c12bd3f38d82376ae3fa3dc8a818a1a9bd19c +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0014018560/cfg.json b/drc11/3a2pv9yr/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1e5e401bc59bd77c9fffa6d1892ffa2ce88a4d87 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2738, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0014018560/model b/drc11/3a2pv9yr/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..effe8ad07745f7fea3a1bce1394e19e8a3b4ca21 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9205324e5e4bc8c3bde9b9925c3330f435121d4667ae6c5532d7de933360a6bc +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0015022080/cfg.json b/drc11/3a2pv9yr/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d98b49e65a137a18d4c297b7fac081b292a38412 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2934, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0015022080/model b/drc11/3a2pv9yr/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..0959cb773610391f619e0117fef7adf4312632e9 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc214c25730e4fcbc699d23adb297203567d90d0db6afa920c5572eef1d2ac3a +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0016020480/cfg.json b/drc11/3a2pv9yr/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..04cbc825bf6a14024cdd95ac13bbc86eb8ec1fea --- /dev/null +++ b/drc11/3a2pv9yr/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3129, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0016020480/model b/drc11/3a2pv9yr/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..a189e004a8ee34e841e97b104e9ac247a6e33943 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de32cef310ed1fce820078f0573d129b869cc160713355982eb5f49c28a25646 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0017024000/cfg.json b/drc11/3a2pv9yr/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e4a65bcfbbca43e5a997587a5c1cf946e110a72b --- /dev/null +++ b/drc11/3a2pv9yr/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3325, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0017024000/model b/drc11/3a2pv9yr/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..b338e40182f15f957670f666f3e91098e0ccb1ad --- /dev/null +++ b/drc11/3a2pv9yr/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:410282849c42fa499792afbda93980a2c24669cdef40cf12929030a507c2765c +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0018022400/cfg.json b/drc11/3a2pv9yr/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6b1a736d3b27761931e5b12dc405cc9b13653a2c --- /dev/null +++ b/drc11/3a2pv9yr/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3520, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0018022400/model b/drc11/3a2pv9yr/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..472a76e41245bb7bcab07bbed7ce8f18cbb79ee9 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1334a1c97167d5903b87553037d0053c186139e1349131bdfd866691d57693d4 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0019025920/cfg.json b/drc11/3a2pv9yr/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..240a50441eaa2e26dcaf8e26f3c92ca36163ab87 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3716, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0019025920/model b/drc11/3a2pv9yr/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..9ceec5e47dc8e0c75f9c6b9dbf6dbde8ff59ebc3 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f86d5a4af6ce826da650d819d42355a01cb77057b438ce4d3faa147dee1df1 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0020029440/cfg.json b/drc11/3a2pv9yr/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3722cbfde181556f89acb54abed528ef048e78fc --- /dev/null +++ b/drc11/3a2pv9yr/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3912, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0020029440/model b/drc11/3a2pv9yr/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..eac7c711aa57c697d96c8489ec57e95a62551b26 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc3b8f31fff56b56637e337ae352a25e819d53925af62a4ab3b20d32295a1e72 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0030044160/cfg.json b/drc11/3a2pv9yr/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c1fef25d445920c524e24b2049095a1e859b12bb --- /dev/null +++ b/drc11/3a2pv9yr/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 5868, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0030044160/model b/drc11/3a2pv9yr/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..3fe4051c94992e1c1ff29a6bb7730b92003503d9 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ecf6aab02f0e45bb7db1b7edd24eb64e70a5d8db00eaa34e1ee76e8b0bbb8d +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0040058880/cfg.json b/drc11/3a2pv9yr/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a1647504fe4fc0c42aaabef5dda69b20b75868f5 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 7824, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0040058880/model b/drc11/3a2pv9yr/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..4df3bde3be5b73fb900e1702adb43d364722bffb --- /dev/null +++ b/drc11/3a2pv9yr/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0f6551f8cab4240fcbb6ecd3d3d5b96ef6b939d939a0ee6f596df649ad1d53f +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0050073600/cfg.json b/drc11/3a2pv9yr/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..96a664d4efaa7990b65b536dce2ff9e3ab3c4966 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 9780, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0050073600/model b/drc11/3a2pv9yr/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..eff0e26afe5779f49eaf261359489601c2f6e27f --- /dev/null +++ b/drc11/3a2pv9yr/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b1ad227eb81a552d4f0e50f62797c4181287ce4bec42c8f56001b1024895e81 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0060088320/cfg.json b/drc11/3a2pv9yr/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..91be77511e3249389e950ccab881dbb1a55acae0 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 11736, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0060088320/model b/drc11/3a2pv9yr/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..3270908cfe9b87fb509d92c4eabde4de10d6fc02 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e88143e79abf62d37cd3dd9855240818ca2ed2b9009fcd78902f452606946240 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0070103040/cfg.json b/drc11/3a2pv9yr/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2f781631f1745cefc934ace5cb9ac25f24a440c0 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 13692, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0070103040/model b/drc11/3a2pv9yr/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..baab4a74848825e4cc076122edbfccf6c77e1df9 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf3110bb3c0fc63d63005a25af5fbb38f3232f3ce06241aa22e44fdecac8fbb +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0080117760/cfg.json b/drc11/3a2pv9yr/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19f86aa2e056609b4c6b1e664922b72b731f61bb --- /dev/null +++ b/drc11/3a2pv9yr/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 15648, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0080117760/model b/drc11/3a2pv9yr/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..5c90fd96e0e1235ce707f1f399c5cad010a73f8c --- /dev/null +++ b/drc11/3a2pv9yr/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c73d29bb85f61f154ba81e5037b0ab694bd7ae6ae976a12b1627de28180811b +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0090132480/cfg.json b/drc11/3a2pv9yr/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a24db9d08ab86338e9754c90945167d968bc7fd5 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 17604, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0090132480/model b/drc11/3a2pv9yr/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..3419f0088e51358fa0c41e12e587170c3dde5437 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2697425efeb4c0f3e7657ceb462a509243475139da0ff6acf80cf1132de268 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0100147200/cfg.json b/drc11/3a2pv9yr/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6b742a44729222c012f55b9dce88c4745c311fb0 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 19560, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0100147200/model b/drc11/3a2pv9yr/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..e1c4b6dc35d2b6f3c488d3063dd3a8c00449589c --- /dev/null +++ b/drc11/3a2pv9yr/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0a334b723ad919fdcdde81f5f218ff1971af6961d0724d0f943adc2446b9a8f +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0110161920/cfg.json b/drc11/3a2pv9yr/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..aa241026ff2ddc98c183aff3d0e8937ee7af986c --- /dev/null +++ b/drc11/3a2pv9yr/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 21516, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0110161920/model b/drc11/3a2pv9yr/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..36594566210684f3d40eea798d682dee172ca520 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6712cfcddfbb42680f511e82bfd2b400475f4500b2a61c6ac23e30e28c5cb0 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0120176640/cfg.json b/drc11/3a2pv9yr/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..01b772c17c001447d1d22fc2376da5ec74f5e9d0 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 23472, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0120176640/model b/drc11/3a2pv9yr/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..d0f4d5b4820421ff40a3fd16b1e1640993e42a3c --- /dev/null +++ b/drc11/3a2pv9yr/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3311d13a610e6b977dd69495866647c94709ccba32c120ab9d651bc255ef6d +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0130191360/cfg.json b/drc11/3a2pv9yr/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..49f27bfba5e4d0c01257247da4134305d6505b63 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 25428, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0130191360/model b/drc11/3a2pv9yr/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..f80ef3d5cce49edb2c7e42bc1731aab2a36318db --- /dev/null +++ b/drc11/3a2pv9yr/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4367ccb21f799a0eea38f9457a48aa8cfc8a36b2c161c350fb5a2daefc752423 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0140206080/cfg.json b/drc11/3a2pv9yr/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7a7e4a28abaa614728532f4100733b604a47306d --- /dev/null +++ b/drc11/3a2pv9yr/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 27384, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0140206080/model b/drc11/3a2pv9yr/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..11354c0a3a115174d921b7ba906cbd93071f6a27 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5154c19b24c421186e19af64ecbf2f74cce8d527038e48e7ffe06180ea0aa00 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0150220800/cfg.json b/drc11/3a2pv9yr/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a512f9983ad6271df960ea397364d0f44a091c14 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 29340, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0150220800/model b/drc11/3a2pv9yr/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..13d9a5130eff9ec9bc77e70d0c6b7e6b18c74612 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08cf11488dba6f0f5742151e532307877d1ef9ab502af124490d1852e119fa5e +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0160235520/cfg.json b/drc11/3a2pv9yr/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8cf9a33fc9608ee63ecc2cd19deba144602e27c5 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 31296, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0160235520/model b/drc11/3a2pv9yr/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..eaefe58c4a38b29604a42f77568132d866c9910a --- /dev/null +++ b/drc11/3a2pv9yr/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b20371877e6e1b7b5fec27e0a671fc0b6dee8e63eefaf6f53c7146b82c9067 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0170250240/cfg.json b/drc11/3a2pv9yr/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7f1b8f7f74a3307f66f064759fec25952e9c3939 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 33252, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0170250240/model b/drc11/3a2pv9yr/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..b158898d560e3eff2a6b6bd4197609478359949f --- /dev/null +++ b/drc11/3a2pv9yr/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64eef39a69e779b63cac4268be7e70cdc5bb5b2a2afc7cafb08da6aec66acfee +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0180264960/cfg.json b/drc11/3a2pv9yr/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..15aff7cd1342bf3514424814a370fefc461e561e --- /dev/null +++ b/drc11/3a2pv9yr/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 35208, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0180264960/model b/drc11/3a2pv9yr/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..091491ea8591cda163abc061c4b7ea3afa01cf8e --- /dev/null +++ b/drc11/3a2pv9yr/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5933c4b2a0d43da722d3c77f7fc7e9075169ca16ef67070992686e2ac8a53eed +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0190279680/cfg.json b/drc11/3a2pv9yr/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..97978984e705b241661ed106d0a5368bdf4b7139 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 37164, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0190279680/model b/drc11/3a2pv9yr/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..adf3e5af13b1472258d1130c5287c2ff3615e67a --- /dev/null +++ b/drc11/3a2pv9yr/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e39feb1fab96e618b766be933b5ac1d16d4928c5ca1969e123c63caed1a5a0e6 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0200294400/cfg.json b/drc11/3a2pv9yr/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..64a1d935f27fda350b90fb959fadbbcc28cf88aa --- /dev/null +++ b/drc11/3a2pv9yr/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 39120, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0200294400/model b/drc11/3a2pv9yr/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..7e71d5c6d6770942c1ab54891507f14e6d69bee6 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1225625cbb01ff924de692018ae1635f890df193224d93ed453fc8141d7c5377 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0300441600/cfg.json b/drc11/3a2pv9yr/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3e1072c446847918e48f09c99d11cb890aff9abd --- /dev/null +++ b/drc11/3a2pv9yr/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 58680, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0300441600/model b/drc11/3a2pv9yr/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..13bc63a872dbc7d28a87f75d2804a6f5971b1df3 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:017d3d9cee2bed07ce0d32dee5aec9817cff4d9f2f90d138050ac2a11a643bac +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0400588800/cfg.json b/drc11/3a2pv9yr/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a20c4386c6a33aef624339f7ffb658feaff20d98 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 78240, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0400588800/model b/drc11/3a2pv9yr/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..dba3ad932e61e4b6407a56dbc39a58a8d0cf7a0b --- /dev/null +++ b/drc11/3a2pv9yr/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c34ee3a977483342cb350176540695822ed93271606f02f876871f20bb61544 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0500736000/cfg.json b/drc11/3a2pv9yr/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a53702354d7be0dc5468d26e9ed844ab9abb57fc --- /dev/null +++ b/drc11/3a2pv9yr/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 97800, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0500736000/model b/drc11/3a2pv9yr/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..2592baac045452be532eb4bb624aa7c911d48f28 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abaca9f37a17ff04b2d23c176f437b7b031d371d3c7056dc5374719bbcfde866 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0600883200/cfg.json b/drc11/3a2pv9yr/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae71c753f462dcaa346e5fca436dc2a8fcb46267 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 117360, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0600883200/model b/drc11/3a2pv9yr/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..a1ee6eb30de96cda2a6ac6ce50a473a3b3a2a33a --- /dev/null +++ b/drc11/3a2pv9yr/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a57e1576cc5661c31deaa130da37bd8e0ec87bab7f5888ebf908252feca7538 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0701030400/cfg.json b/drc11/3a2pv9yr/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..50e880746a39e900427e0097c8f68cd3ceb971f9 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 136920, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0701030400/model b/drc11/3a2pv9yr/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..73a293e3c5dfa9faf1f965254e5592b1b5d6c1a3 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31f26a5558d43a683cef931dac772c432c5422a57668660ff9abd789133709c +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0801177600/cfg.json b/drc11/3a2pv9yr/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ad1f7ced4fab98813ad4b1e66f433c5a488eed9e --- /dev/null +++ b/drc11/3a2pv9yr/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 156480, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0801177600/model b/drc11/3a2pv9yr/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..2a08569cacb980319e93018c116e9ca767f76f98 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22879021b35ed43e41fe21b33a442a0128eb71449780c35f36c17fac04065f31 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_0901324800/cfg.json b/drc11/3a2pv9yr/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..14a2fdd117ee3a356a15459642248f71bd4a9aa1 --- /dev/null +++ b/drc11/3a2pv9yr/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 176040, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_0901324800/model b/drc11/3a2pv9yr/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..3fad385c00c8efab8deecb00189eab2adf96c7bf --- /dev/null +++ b/drc11/3a2pv9yr/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7505aff5bbd8deaae4b8ce9c99634da82831eafa77117b7cee2beefa5bb2f40f +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1001472000/cfg.json b/drc11/3a2pv9yr/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3dfe7a5ee887837173ff1133bb81066c2db14861 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195600, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1001472000/model b/drc11/3a2pv9yr/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..0756cb9820b3682b4ec1e8aeb17de50dc32d8d8a --- /dev/null +++ b/drc11/3a2pv9yr/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faa705c9f1a98e2c7cf40564d1d626bd914ef2e5dcc98f313cee73e014ac97e7 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1101619200/cfg.json b/drc11/3a2pv9yr/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0a45aaa6116af86afbe86b157eeb9003476ce43c --- /dev/null +++ b/drc11/3a2pv9yr/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 215160, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1101619200/model b/drc11/3a2pv9yr/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..368a1c8af74cbea662d5bb078294ebb6ed1c6ef4 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c810b64c75ac24d9338724e78919353a3a0b4d0f7fa59e47a453344a1110770c +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1201766400/cfg.json b/drc11/3a2pv9yr/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..859c7c82b5526ab9c1e4f17382dd8a20301832f4 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 234720, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1201766400/model b/drc11/3a2pv9yr/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..687ef511efc16f83da640c28414674d90990fa52 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:231ff4bd29d131a2efce711f0b68caff0557b77d17ea2118f182b1b92e9f333b +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1301913600/cfg.json b/drc11/3a2pv9yr/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7729175c388f61e22a4d7ca8a9307442da0d351d --- /dev/null +++ b/drc11/3a2pv9yr/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 254280, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1301913600/model b/drc11/3a2pv9yr/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..1b2a6fe4cfe6d6adae5024b8443f57d41066ac02 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7933923640f57c34b91f9919f18ff23e3b24dbbe6c76b6cc63d64d4db9de57bd +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1402060800/cfg.json b/drc11/3a2pv9yr/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4dd7dd0fcb4130440ec6d2a4f8f4a1502dc38c58 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 273840, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1402060800/model b/drc11/3a2pv9yr/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..38360d1db9503ec2207a683867edde6df1ae5490 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9667ad8c9db87607acffbba7fb992634e91641a80e4d41196d93612571451b7 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1502208000/cfg.json b/drc11/3a2pv9yr/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..89a452a974b10b0d718c85c9c83469050548c513 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 293400, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1502208000/model b/drc11/3a2pv9yr/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..3fe20b2cf7b13895a81a3d256ceb349a564ff507 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2066615f8a33521cf4af1668fc882bdf2d76fc8bfebe679915233599abb0adf +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1602355200/cfg.json b/drc11/3a2pv9yr/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2959c2a594c0ffc493403456f8f689336afacc6d --- /dev/null +++ b/drc11/3a2pv9yr/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 312960, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1602355200/model b/drc11/3a2pv9yr/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..b29aeb5e64b4ec3e76e9116acb8da1e07cb52e41 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e50f0e335b15a287d7fb1eb1b122187443138559aa0c916c29a6e9f1e879b92 +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1702502400/cfg.json b/drc11/3a2pv9yr/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1183159f30f8a64384dc894becef9373556b435c --- /dev/null +++ b/drc11/3a2pv9yr/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 332520, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1702502400/model b/drc11/3a2pv9yr/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..97b050fc0c1f578841baf09ede63ce9d69c036c8 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a0354ddfee214130346be56f2e920fe685e23b0ee7c1f38c147c35ecbdc20a +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1802649600/cfg.json b/drc11/3a2pv9yr/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c588682de8f8e74754f3c9a82c0378406a50cbf1 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 352080, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1802649600/model b/drc11/3a2pv9yr/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..c1bd6563fccde2a5c5b8f6d832036e093be84eef --- /dev/null +++ b/drc11/3a2pv9yr/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ba77c60d3f03c0de06fc4b696b8330b2c79e21f93c871719bc73da033ff18f +size 15803057 diff --git a/drc11/3a2pv9yr/cp_1902796800/cfg.json b/drc11/3a2pv9yr/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b18df4af5942b5fe6275cfa64558c2a77f443927 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 371640, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_1902796800/model b/drc11/3a2pv9yr/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..da7a2a156b45d85116f8e631175a535cf10bf8e5 --- /dev/null +++ b/drc11/3a2pv9yr/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b495c0d177169dffaf3df7432c18eda19a5f0b7168f92cc3b6041e481757635f +size 15803057 diff --git a/drc11/3a2pv9yr/cp_2002944000/cfg.json b/drc11/3a2pv9yr/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4a1254035d3d18a9136ba0cbca8a95dfd3aa2f0d --- /dev/null +++ b/drc11/3a2pv9yr/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413569759}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 921240238, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391200, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc11/3a2pv9yr/cp_2002944000/model b/drc11/3a2pv9yr/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..272f4bdbacd1e1b4044c5147529a8b8d63345834 --- /dev/null +++ b/drc11/3a2pv9yr/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2897843e573e727c92f415eea5a68bf90ce85c80597d18b0f02098f407d56f15 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0000998400/cfg.json b/drc11/3i5nocf6/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0f9fedea992e1a3c1c6d2f3d9acc29a5938da4a0 --- /dev/null +++ b/drc11/3i5nocf6/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0000998400/model b/drc11/3i5nocf6/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..8f15441d246e25b831abba74bdaa36f310012bd4 --- /dev/null +++ b/drc11/3i5nocf6/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6595635bce5365d3021614d1dd2d7b3a92454f3b2b0f1189499170450cbbf9d6 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0002001920/cfg.json b/drc11/3i5nocf6/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dfc65e229db10fd92267f8204d0e635403afa731 --- /dev/null +++ b/drc11/3i5nocf6/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0002001920/model b/drc11/3i5nocf6/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..785be29abc1f2328d94f2d69bcdee571252dbbd7 --- /dev/null +++ b/drc11/3i5nocf6/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531497c5277ec7542e2bb9a2bae7c49786038685ee6d63d757f01a4e18da989e +size 15803057 diff --git a/drc11/3i5nocf6/cp_0003000320/cfg.json b/drc11/3i5nocf6/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..398759edf36d63b6a3d74591872b7bbfafb92945 --- /dev/null +++ b/drc11/3i5nocf6/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 586, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0003000320/model b/drc11/3i5nocf6/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..fc61da2a28bd1a01b00fd1b4cc666d0fe7d7e442 --- /dev/null +++ b/drc11/3i5nocf6/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00c6da6904807e4b64d427a7d295e5e78abe88e374986f18769699f320c63fc +size 15803057 diff --git a/drc11/3i5nocf6/cp_0004003840/cfg.json b/drc11/3i5nocf6/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..85e87d2c4e918b14b1abdadd59ebc1fb0b936c3f --- /dev/null +++ b/drc11/3i5nocf6/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 782, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0004003840/model b/drc11/3i5nocf6/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..99dca335b20ef4a6e19cf13da0482cf183cb6907 --- /dev/null +++ b/drc11/3i5nocf6/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75f3eeccc27b4ff48fd15d45a1b97eec6170a37830782c1f9c0d85347b8d63a +size 15803057 diff --git a/drc11/3i5nocf6/cp_0005007360/cfg.json b/drc11/3i5nocf6/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d6bd0f6e524c9c58b201769d8d277a0112e3fca8 --- /dev/null +++ b/drc11/3i5nocf6/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 978, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0005007360/model b/drc11/3i5nocf6/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..062f6c3fe23a8fe5551363e12c6a62398fd967b8 --- /dev/null +++ b/drc11/3i5nocf6/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7116ed6c6465cb0af3bbded5d69fc8711b8dd1525e79996c7786fb8c096f64d6 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0006005760/cfg.json b/drc11/3i5nocf6/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8f006c2d754d532f727b833ac931d080bd5f4517 --- /dev/null +++ b/drc11/3i5nocf6/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1173, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0006005760/model b/drc11/3i5nocf6/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..be08ae501bfd0bc51f0138767568642470b8c96f --- /dev/null +++ b/drc11/3i5nocf6/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc5d64d362db42aa8a579122a890553e8430b5e0b2ea14db4e34f4428e1bb49 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0007009280/cfg.json b/drc11/3i5nocf6/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..366ca83d1c078372300f8360ec3f5a36d9e4de36 --- /dev/null +++ b/drc11/3i5nocf6/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1369, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0007009280/model b/drc11/3i5nocf6/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..b75a6b86605708611b4920bfc7a3f55d3e320cee --- /dev/null +++ b/drc11/3i5nocf6/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828bff0ffda6edbf015eed7f1e8cde51f138af30771495363d4177e9b44e9fb3 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0008007680/cfg.json b/drc11/3i5nocf6/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..42366fa547401bdf2a4c8141f2bec9e1f3fb009c --- /dev/null +++ b/drc11/3i5nocf6/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1564, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0008007680/model b/drc11/3i5nocf6/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..635280ad77e004b44d80059b1339903503c3db54 --- /dev/null +++ b/drc11/3i5nocf6/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c109af68389dc322cae64298be3331a84867dbd8ef62d50f9c03ecdebb1d85cb +size 15803057 diff --git a/drc11/3i5nocf6/cp_0009011200/cfg.json b/drc11/3i5nocf6/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d2157c3e6dd716bc28f23b11910421f782235afc --- /dev/null +++ b/drc11/3i5nocf6/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1760, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0009011200/model b/drc11/3i5nocf6/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..d7d24b35dcfd6fee71ce2617d10141a4582a1c0d --- /dev/null +++ b/drc11/3i5nocf6/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2e20a131ab5d36ea7e2ec0e1ec0e13584bb7d108ac208d76cfd60501d9da16 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0010014720/cfg.json b/drc11/3i5nocf6/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..37609b2a1a67df41fd5339261a6bd4dcab9b20ab --- /dev/null +++ b/drc11/3i5nocf6/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1956, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0010014720/model b/drc11/3i5nocf6/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..f552f0b0b3e80ffe683376152f3cc69f453f46b1 --- /dev/null +++ b/drc11/3i5nocf6/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfebad815a5f071b2610625bae3892fdeb07b3d984e09e37ea6471b2180dc64c +size 15803057 diff --git a/drc11/3i5nocf6/cp_0011013120/cfg.json b/drc11/3i5nocf6/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..54b8199d926c5e8633243e3ef92e97a6c47012ec --- /dev/null +++ b/drc11/3i5nocf6/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2151, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0011013120/model b/drc11/3i5nocf6/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..44514e9f47a1b821e1f898e258b84a75abc254c9 --- /dev/null +++ b/drc11/3i5nocf6/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c455440b5dec88a71299c0fa6de5ce6b2fcd91753bc7315bbc020006b92044 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0012016640/cfg.json b/drc11/3i5nocf6/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6cc91f2ebc97c93b65c4165e83a1cafa5d82b53c --- /dev/null +++ b/drc11/3i5nocf6/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2347, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0012016640/model b/drc11/3i5nocf6/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..8b34e823762bc08894734ee309c2ed3c26292b57 --- /dev/null +++ b/drc11/3i5nocf6/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6e2cde5825cb4bc5b99c57ebd4417f54e0c818a52a6f79b7222bb654b63ec4 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0013015040/cfg.json b/drc11/3i5nocf6/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ec165fb927e98159fbbbd99573bfc24ecf208248 --- /dev/null +++ b/drc11/3i5nocf6/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2542, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0013015040/model b/drc11/3i5nocf6/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..c99b37a5d42a52fc1c89cf16a162ef8ce597c8eb --- /dev/null +++ b/drc11/3i5nocf6/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d9dbe7599d5845d9384e0a6a837a7b45c1a508d6627884ee28cf535b1663468 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0014018560/cfg.json b/drc11/3i5nocf6/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..95b72c0ec2434b4cf0152bf255185b562a384de7 --- /dev/null +++ b/drc11/3i5nocf6/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2738, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0014018560/model b/drc11/3i5nocf6/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..8c94b791543341633cbd0a17764548203ce75cd6 --- /dev/null +++ b/drc11/3i5nocf6/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9af2932cd27049f120568ce3df7bbf35814c2489cee38e8eff1f53cb97b829a3 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0015022080/cfg.json b/drc11/3i5nocf6/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9fccf2734d7e61905cc01e79b92d169d57520bdc --- /dev/null +++ b/drc11/3i5nocf6/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2934, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0015022080/model b/drc11/3i5nocf6/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..d661d45b1a358f48555884ecc01144cd6edebf4d --- /dev/null +++ b/drc11/3i5nocf6/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe64de0a3885988fa60c4be560e2b7eda3761dda0fff55979c01fd965ac057a +size 15803057 diff --git a/drc11/3i5nocf6/cp_0016020480/cfg.json b/drc11/3i5nocf6/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2afa17ae667e5ee4d4dd146376f53537b92d7376 --- /dev/null +++ b/drc11/3i5nocf6/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3129, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0016020480/model b/drc11/3i5nocf6/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..d21453062a923997dbc5b66bb4882033e0e51730 --- /dev/null +++ b/drc11/3i5nocf6/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cdf579a825dc721679b7e59332d73012ab7d16a311f9cb9ebad541c735ec60e +size 15803057 diff --git a/drc11/3i5nocf6/cp_0017024000/cfg.json b/drc11/3i5nocf6/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..91e2feb4f0f4789ac523049eb4c2ccdea5ef048e --- /dev/null +++ b/drc11/3i5nocf6/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3325, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0017024000/model b/drc11/3i5nocf6/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..31b0cbf8ecabe81fb804b75027b93aec27ff172f --- /dev/null +++ b/drc11/3i5nocf6/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:823cec17f120c170bec2af7645a449dbf20782cbeee08aef56da6744bf0551d5 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0018022400/cfg.json b/drc11/3i5nocf6/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..014c3fb6ec833650ffb35ee34ab6a4bdbf14f1b7 --- /dev/null +++ b/drc11/3i5nocf6/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3520, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0018022400/model b/drc11/3i5nocf6/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..5fbf4f5c0475e268b05784c0b1ec77abe7379fe6 --- /dev/null +++ b/drc11/3i5nocf6/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8166d404757d966337848db792a2fd053af5e5fa0b55735ee8c8a9150adafd01 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0019025920/cfg.json b/drc11/3i5nocf6/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e4369d2a50c22438f5e2aaa17d782ebd5f6d2ccd --- /dev/null +++ b/drc11/3i5nocf6/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3716, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0019025920/model b/drc11/3i5nocf6/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..a22bc8e75668e2847d7b242f611ba03c4672fc1f --- /dev/null +++ b/drc11/3i5nocf6/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e78c78ef8abb07bae4087e2095605392b67ab8787e630fad6432c3a5668cc5 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0020029440/cfg.json b/drc11/3i5nocf6/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f57b2db8a4a6a09080bda26bd9fff25a4d87970a --- /dev/null +++ b/drc11/3i5nocf6/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3912, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0020029440/model b/drc11/3i5nocf6/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..2f9d192b7fcec8fa784a9bc38b74f111b5c89cbb --- /dev/null +++ b/drc11/3i5nocf6/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cc5be8a94e511e038e031e4d7926776f223c7ab6134ee02cab5f986d86b7ec3 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0030044160/cfg.json b/drc11/3i5nocf6/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d6629822be98de85d32fd84a30cb98fbb2af24b0 --- /dev/null +++ b/drc11/3i5nocf6/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 5868, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0030044160/model b/drc11/3i5nocf6/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..f05f5699f79b808f43e94bd6aee8c9289264d1ab --- /dev/null +++ b/drc11/3i5nocf6/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f4ca31429f8644fc7f3a0c7e194c17979d4abb8f738a4fa8e8fe6ae1293e74a +size 15803057 diff --git a/drc11/3i5nocf6/cp_0040058880/cfg.json b/drc11/3i5nocf6/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..752e56b862d2b86e024f36d136aef9d95c7d02ec --- /dev/null +++ b/drc11/3i5nocf6/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 7824, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0040058880/model b/drc11/3i5nocf6/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..b1e076cdc32fa3c590545b7d99e3ba6751390608 --- /dev/null +++ b/drc11/3i5nocf6/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2364cb7be2f4063078ce885a71fbadfcb07c7a80e0eddd670ce09c2ec696fd72 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0050073600/cfg.json b/drc11/3i5nocf6/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cd397c9e2cf064ed999cb4d078e2da7ade665f88 --- /dev/null +++ b/drc11/3i5nocf6/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 9780, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0050073600/model b/drc11/3i5nocf6/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..041af90c40856c56f32b5ebe025f6e0dd7b38538 --- /dev/null +++ b/drc11/3i5nocf6/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6a88cefb9fa43dc1e7bd9d78d6114db12ad8c180fbc13c53ca6e3b6e71433e +size 15803057 diff --git a/drc11/3i5nocf6/cp_0060088320/cfg.json b/drc11/3i5nocf6/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..59885af1057fc6e062a96e312bcff7ff85dec6b0 --- /dev/null +++ b/drc11/3i5nocf6/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 11736, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0060088320/model b/drc11/3i5nocf6/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..58ee0f47c40a2aeec7d1b73bd1b8adf7ad9290aa --- /dev/null +++ b/drc11/3i5nocf6/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da07b96b6e5cf76c60b5485b0bbf53b8b8993a7939626c55742366561e6c2289 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0070103040/cfg.json b/drc11/3i5nocf6/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..961a4e7de046a5d9a50fbcd23c67aca9909494d7 --- /dev/null +++ b/drc11/3i5nocf6/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 13692, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0070103040/model b/drc11/3i5nocf6/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..7a9d7aad403eb20247f880bdc58c560cf5530ea3 --- /dev/null +++ b/drc11/3i5nocf6/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9ff576a8bfa73e6f6d32ab55c37c8442be9b5ac57af7974a792a4ead824a01 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0080117760/cfg.json b/drc11/3i5nocf6/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0145ed7c41fde5da0c3d7f1e1636b4a0913e0123 --- /dev/null +++ b/drc11/3i5nocf6/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 15648, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0080117760/model b/drc11/3i5nocf6/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..62ba13d1c7ca17f23cdc2e7381111951e26f0863 --- /dev/null +++ b/drc11/3i5nocf6/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1ab50bf700ab8b456f74d23d92a9d58a5f9a883dd4b1d9fb9f5a837ebd75ca +size 15803057 diff --git a/drc11/3i5nocf6/cp_0090132480/cfg.json b/drc11/3i5nocf6/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f292bacde2b49985a312af7eb3d69ea0ee114657 --- /dev/null +++ b/drc11/3i5nocf6/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 17604, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0090132480/model b/drc11/3i5nocf6/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..fe280ed372cc6975f30f2737fe0ff66d8974fb87 --- /dev/null +++ b/drc11/3i5nocf6/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f93ad578993bccb3a4ab3c7a62dd769442fbb7f83f1ef49baaeab978be5da99 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0100147200/cfg.json b/drc11/3i5nocf6/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d65c35d6ad27070452aece1f9b9ebbdcd1c7446f --- /dev/null +++ b/drc11/3i5nocf6/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 19560, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0100147200/model b/drc11/3i5nocf6/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..2e8b99f34d244affa6b8273a48cfe6bf08ea5998 --- /dev/null +++ b/drc11/3i5nocf6/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e0c93e91bd33d1a670eb4288485764883855d0555a1e8c347fda4cc2532ae0a +size 15803057 diff --git a/drc11/3i5nocf6/cp_0110161920/cfg.json b/drc11/3i5nocf6/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f5c8295018464e140a9f26adee72d808d356d5af --- /dev/null +++ b/drc11/3i5nocf6/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 21516, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0110161920/model b/drc11/3i5nocf6/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..79dcb16e361a4914115a731f7ae2b7a83ddb124c --- /dev/null +++ b/drc11/3i5nocf6/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6e518f3460ffd14fd9cb4b55a77f0ab243824f6eea46f87e8ad6bb880e86e7 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0120176640/cfg.json b/drc11/3i5nocf6/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..20cd0fd00fb17715153db43b1dd564e884c6dd06 --- /dev/null +++ b/drc11/3i5nocf6/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 23472, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0120176640/model b/drc11/3i5nocf6/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..3e597943fa4946daef268c3481f4188a12ae8f0a --- /dev/null +++ b/drc11/3i5nocf6/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59bec43f56d1dad1481eb47a82660b4e96c57b81ddc96b599e094b8ea9c3cdd0 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0130191360/cfg.json b/drc11/3i5nocf6/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5128515ebb0215baefaa6bce28c9dc961936d69b --- /dev/null +++ b/drc11/3i5nocf6/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 25428, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0130191360/model b/drc11/3i5nocf6/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..220d122aebc19e89ef5f2e2c80375c307b79d8d5 --- /dev/null +++ b/drc11/3i5nocf6/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce7811d1f5cef647706b0c7ba056e749765c91d8a61d6f15022473ae749ab8f +size 15803057 diff --git a/drc11/3i5nocf6/cp_0140206080/cfg.json b/drc11/3i5nocf6/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4a02eb6532ffc8cf2f0702f7b81cf3c5a33fee1f --- /dev/null +++ b/drc11/3i5nocf6/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 27384, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0140206080/model b/drc11/3i5nocf6/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..fad9947b1e7100d75abaa8839471d94507f6240a --- /dev/null +++ b/drc11/3i5nocf6/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978a912a7cf5547d4bc0f025ba53022b99ce26c58d855f58e49a3eeefdccdf99 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0150220800/cfg.json b/drc11/3i5nocf6/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5fe0d3b79fb5e3aef2bd5eeb7924f80c049e138e --- /dev/null +++ b/drc11/3i5nocf6/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 29340, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0150220800/model b/drc11/3i5nocf6/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..a6468629039c7edb6cec8b99885179745748e4b1 --- /dev/null +++ b/drc11/3i5nocf6/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6da51e4d6d066b62568744243f3084ea9d093b6c59bda76e760d3c062b7fe8 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0160235520/cfg.json b/drc11/3i5nocf6/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3567af35c14f04eef7d7cbfc267f94cea78df842 --- /dev/null +++ b/drc11/3i5nocf6/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 31296, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0160235520/model b/drc11/3i5nocf6/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..b74563fc5dd7d0e03c5b8c312ad773f57eaa1dc3 --- /dev/null +++ b/drc11/3i5nocf6/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b768238cef2a9679a6d1dd6bc515da210d4880fe5d6e3cf4bd6d18cbb360aa +size 15803057 diff --git a/drc11/3i5nocf6/cp_0170250240/cfg.json b/drc11/3i5nocf6/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b90b5d27a02c9fd724e9c29050bc99e005f643d9 --- /dev/null +++ b/drc11/3i5nocf6/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 33252, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0170250240/model b/drc11/3i5nocf6/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..973c9ab17011b60add931e2db3e066f80732a0b2 --- /dev/null +++ b/drc11/3i5nocf6/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81afe48c409c8d9aa67954d97a1b3db042a70fceb3745e4cdf232484b5a13fe +size 15803057 diff --git a/drc11/3i5nocf6/cp_0180264960/cfg.json b/drc11/3i5nocf6/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2273ee7e15283147ce4000b37c325b19564e26ad --- /dev/null +++ b/drc11/3i5nocf6/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 35208, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0180264960/model b/drc11/3i5nocf6/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..1587d257cb3e05cc16270cc8e9288b6f9fc2c18c --- /dev/null +++ b/drc11/3i5nocf6/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90917d87c9c70404e4d8d95c0d211c6410c3e8fca41f95323f1a61eae0888a5 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0190279680/cfg.json b/drc11/3i5nocf6/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5eb743906e7b668563ece25a6843296ade7b18d8 --- /dev/null +++ b/drc11/3i5nocf6/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 37164, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0190279680/model b/drc11/3i5nocf6/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..77be8a3ba687d04a5784560b69b04a049dbf9fd1 --- /dev/null +++ b/drc11/3i5nocf6/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62d6c0dc46c1a5e20754317bba0d2708d8885b408d74a437c09d0c394c66a37 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0200294400/cfg.json b/drc11/3i5nocf6/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1fae8d18901fd8db6951706ac38bf449511c26bd --- /dev/null +++ b/drc11/3i5nocf6/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 39120, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0200294400/model b/drc11/3i5nocf6/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..6b6f1a8ba8aaf3ca0b28fbec66df93d9c99a8bf9 --- /dev/null +++ b/drc11/3i5nocf6/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cbbe0dd88f69b12d46fac47d98f1dbf2d29fd29702b44d0339f2b82f3c0e687 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0300441600/cfg.json b/drc11/3i5nocf6/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d3ab0dcc3b1aafb4c2868f803ecbb2004294c679 --- /dev/null +++ b/drc11/3i5nocf6/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 58680, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0300441600/model b/drc11/3i5nocf6/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..542181a6ccf876bacad1d0313f784c9eaf9e1356 --- /dev/null +++ b/drc11/3i5nocf6/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61709bfa8a8e2c63cc7569b007417bf20548519d9c2b888bc56571546d6825dc +size 15803057 diff --git a/drc11/3i5nocf6/cp_0400588800/cfg.json b/drc11/3i5nocf6/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..118e3acd8a8e4b5ad3a78b3a26297f22141be5d9 --- /dev/null +++ b/drc11/3i5nocf6/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 78240, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0400588800/model b/drc11/3i5nocf6/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..2d70f33f8bb86bd76310a41c4294b32af487b1fb --- /dev/null +++ b/drc11/3i5nocf6/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6b6cd44cd5fd0373edc46f6cf9176bf6a43f70bce5a6f0cbef58d0f6c46ec5 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0500736000/cfg.json b/drc11/3i5nocf6/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1bf0741e5f5b943a194862bf037e226da66a85be --- /dev/null +++ b/drc11/3i5nocf6/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 97800, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0500736000/model b/drc11/3i5nocf6/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..1994ccbf2785feb040df0026cbfaa5bb5272bcc3 --- /dev/null +++ b/drc11/3i5nocf6/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebfbf915579aed3247e253d6253c0e4969fbfe76c1174643c6ac1726504f5f64 +size 15803057 diff --git a/drc11/3i5nocf6/cp_0600883200/cfg.json b/drc11/3i5nocf6/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cc35343d2b5f44422db8916e13d5f9e1531ebb09 --- /dev/null +++ b/drc11/3i5nocf6/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 117360, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0600883200/model b/drc11/3i5nocf6/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..36cced0ff8eeff2debc890e3770dad6d8f14e8aa --- /dev/null +++ b/drc11/3i5nocf6/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93622ee1fd01e3c56f69a05821ec7e3a15ffda423293d6575c74bb90b583ccd +size 15803057 diff --git a/drc11/3i5nocf6/cp_0701030400/cfg.json b/drc11/3i5nocf6/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..847e0235bad549bce4d80c358319ee6969066ec1 --- /dev/null +++ b/drc11/3i5nocf6/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 136920, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0701030400/model b/drc11/3i5nocf6/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..9f703ef1b203927d735c596822a03fe4a5b04cc3 --- /dev/null +++ b/drc11/3i5nocf6/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d64a1674986c8d5eb58cdb46e2da6a733e67c4ed4eabb490d906d7ffaf1fd7cf +size 15803057 diff --git a/drc11/3i5nocf6/cp_0801177600/cfg.json b/drc11/3i5nocf6/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e86e2437a492193a60fe6c963d591b3b9c33440e --- /dev/null +++ b/drc11/3i5nocf6/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 156480, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0801177600/model b/drc11/3i5nocf6/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..3b32af9938b3dc0db05d696851e6726c844ce7e4 --- /dev/null +++ b/drc11/3i5nocf6/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de6b34398c147d017242d460cdc3966d3605906beb530efee0c1a788358ac62b +size 15803057 diff --git a/drc11/3i5nocf6/cp_0901324800/cfg.json b/drc11/3i5nocf6/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3832e46b9037f0ad7ea40a1eabb1fe0786063250 --- /dev/null +++ b/drc11/3i5nocf6/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 176040, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_0901324800/model b/drc11/3i5nocf6/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..471cf14cd48dd36bc006d0da2cf837632c2ace4a --- /dev/null +++ b/drc11/3i5nocf6/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4e8058b326c9d7b5141e240e376c9fccb3fc7cb84ee58dbfccb28fba6ce927 +size 15803057 diff --git a/drc11/3i5nocf6/cp_1001472000/cfg.json b/drc11/3i5nocf6/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..79ab1c1354561bfb18687770141f01cf8ad7f9f3 --- /dev/null +++ b/drc11/3i5nocf6/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195600, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1001472000/model b/drc11/3i5nocf6/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..9aba5a50d1a2f9cf6fa2cfe0f6d35f24724c4d8d --- /dev/null +++ b/drc11/3i5nocf6/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62604b8fc83ff8aff2f16c78564cb30f3d32724f32a0d694bd95f544eb06a7a3 +size 15803057 diff --git a/drc11/3i5nocf6/cp_1101619200/cfg.json b/drc11/3i5nocf6/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7aa9b4590583655bc6614dc40695a5f89af35944 --- /dev/null +++ b/drc11/3i5nocf6/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 215160, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1101619200/model b/drc11/3i5nocf6/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..b3c826a129aae670605175853077e673297525e6 --- /dev/null +++ b/drc11/3i5nocf6/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e154bd926a59cf3f86bebfb5bc51b671947152d1125ab8ab42e799cf89c33c0 +size 15803057 diff --git a/drc11/3i5nocf6/cp_1201766400/cfg.json b/drc11/3i5nocf6/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c771aeeea3d8792fb264c2349d5a3e804f8bfc61 --- /dev/null +++ b/drc11/3i5nocf6/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 234720, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1201766400/model b/drc11/3i5nocf6/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..3dea3984dc63537f65a6ba266f47abac4f09f4bd --- /dev/null +++ b/drc11/3i5nocf6/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71198ed63a2b010680a81a8e75ad7584d8462cf863f45067707e33f43f662140 +size 15803057 diff --git a/drc11/3i5nocf6/cp_1301913600/cfg.json b/drc11/3i5nocf6/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9b0e39088bebb9939a353eceed2edb13430297 --- /dev/null +++ b/drc11/3i5nocf6/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 254280, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1301913600/model b/drc11/3i5nocf6/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..52ff24d87a5573eb8244fd58b340d2442ec6a41d --- /dev/null +++ b/drc11/3i5nocf6/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb3ffb3ac3e2f09af1fff1546cf6f0814e9cb78cbfd3e1188122b959ef7c4ea +size 15803057 diff --git a/drc11/3i5nocf6/cp_1402060800/cfg.json b/drc11/3i5nocf6/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..85883900b3327587a6c752b3fc3ea19289c5fbc3 --- /dev/null +++ b/drc11/3i5nocf6/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 273840, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1402060800/model b/drc11/3i5nocf6/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..555b3ebd767d413bd5e6e574da5549ed76ff5553 --- /dev/null +++ b/drc11/3i5nocf6/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7226aa7be0940952a1f97d1733c8c0eaf5078001bf12686a952affe964eaa0b5 +size 15803057 diff --git a/drc11/3i5nocf6/cp_1502208000/cfg.json b/drc11/3i5nocf6/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..692f054a1cb2a1231605e62d3faeb989af8ab412 --- /dev/null +++ b/drc11/3i5nocf6/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 293400, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1502208000/model b/drc11/3i5nocf6/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..5e5cba4f1a7d2ce254db12b010b103b7679202e0 --- /dev/null +++ b/drc11/3i5nocf6/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efee8b05e5f5974ee1929f3e139852ee2420d8c9f455f29fa8c50e5eebabbc9d +size 15803057 diff --git a/drc11/3i5nocf6/cp_1602355200/cfg.json b/drc11/3i5nocf6/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..529aec77087a0daa0f6423d68a2a3571a0cd7deb --- /dev/null +++ b/drc11/3i5nocf6/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 312960, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1602355200/model b/drc11/3i5nocf6/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..d237e6c34c17f684924a657c6181abc2042db66b --- /dev/null +++ b/drc11/3i5nocf6/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6234e4c8075134a01dded86c463baf597128ee2b0104568074217791c42ba47e +size 15803057 diff --git a/drc11/3i5nocf6/cp_1702502400/cfg.json b/drc11/3i5nocf6/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77ff3752fe9384198f8d6a7e75240b5539e950c7 --- /dev/null +++ b/drc11/3i5nocf6/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 332520, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1702502400/model b/drc11/3i5nocf6/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..bc921ab87007dc63476cdf7f3422123119c6fca6 --- /dev/null +++ b/drc11/3i5nocf6/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4948fd3db3c31b8a6938f9b4a85413bfd66b7ade95ab8ef6481257a1c8d64e0 +size 15803057 diff --git a/drc11/3i5nocf6/cp_1802649600/cfg.json b/drc11/3i5nocf6/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a147e1c1e5ff87e8de1870e784c76144af65f020 --- /dev/null +++ b/drc11/3i5nocf6/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 352080, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1802649600/model b/drc11/3i5nocf6/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..a759a94697a20fd6c241c8619bb2d5ff33d4fd3d --- /dev/null +++ b/drc11/3i5nocf6/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e12c7ac34735b0b08abacab216036dc29d6124ef57faad83f6f9b166919c8676 +size 15803057 diff --git a/drc11/3i5nocf6/cp_1902796800/cfg.json b/drc11/3i5nocf6/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3560ac050c3c5a6ba172769d27fab482e98b252f --- /dev/null +++ b/drc11/3i5nocf6/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 371640, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_1902796800/model b/drc11/3i5nocf6/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..3cd08aaa4a2ec32cd7c8aaa7280d690551198c64 --- /dev/null +++ b/drc11/3i5nocf6/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318d37e77e8dd0dca3cbb27f6aa34cdacd91dbb2aebe73bd01ce2cc85dc5cf17 +size 15803057 diff --git a/drc11/3i5nocf6/cp_2002944000/cfg.json b/drc11/3i5nocf6/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c4da00eddf47f9c767c56c6bda04181243c591d1 --- /dev/null +++ b/drc11/3i5nocf6/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1627461699}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 959267281, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391200, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc11/3i5nocf6/cp_2002944000/model b/drc11/3i5nocf6/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..faeb296687243ec4f4a6ebd937851743b265d698 --- /dev/null +++ b/drc11/3i5nocf6/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068f4f2af2c3622391f5f19b44c824b6581965db87bbd7459a527f0fc157c125 +size 15803057 diff --git a/drc11/eue6pax7/cp_0000998400/cfg.json b/drc11/eue6pax7/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..546b666be72503344b49903f0a750a0119c7aa03 --- /dev/null +++ b/drc11/eue6pax7/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0000998400/model b/drc11/eue6pax7/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..4d0c12b382f4fd23637854dda09bc34ab1972376 --- /dev/null +++ b/drc11/eue6pax7/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3693e27029ec4594b8d7c25577b4b520a558db66cc6d768736f806a0e0c09fcc +size 15803057 diff --git a/drc11/eue6pax7/cp_0002001920/cfg.json b/drc11/eue6pax7/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e2550319fd05b0e508c9205edc620594bea13c8d --- /dev/null +++ b/drc11/eue6pax7/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0002001920/model b/drc11/eue6pax7/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..48a2fe61c02b5dc2718f5fee71b0edef9e4279d6 --- /dev/null +++ b/drc11/eue6pax7/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4f0b0fa3cc48f702f3e8840f4beacf2ee559b9cc4c3e0c0a25776b6b2f9005 +size 15803057 diff --git a/drc11/eue6pax7/cp_0003000320/cfg.json b/drc11/eue6pax7/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..80db1444153e5088aa04b01cd967d75f442930b2 --- /dev/null +++ b/drc11/eue6pax7/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 586, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0003000320/model b/drc11/eue6pax7/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..c6a82dcc380ee2bd35fd3ad1d460e9b2143346c2 --- /dev/null +++ b/drc11/eue6pax7/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5021b1e7cb37515cc89d634961ddfc53d8cc6a7053c92bbd9b6eb55d54277fb +size 15803057 diff --git a/drc11/eue6pax7/cp_0004003840/cfg.json b/drc11/eue6pax7/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..35cb958a1cf5a431d4d6da93d4b798db2dcb89ca --- /dev/null +++ b/drc11/eue6pax7/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 782, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0004003840/model b/drc11/eue6pax7/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..e4b449c3e4581bc66a1feb0ac1553c517f2e5293 --- /dev/null +++ b/drc11/eue6pax7/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87dce44ff478a291bc2296ff35edfd7ea3544e3f177a1a16b86a481f9c8ab19c +size 15803057 diff --git a/drc11/eue6pax7/cp_0005007360/cfg.json b/drc11/eue6pax7/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6bc90607fbde1b7233ac563f3a1d31bff55c20bf --- /dev/null +++ b/drc11/eue6pax7/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 978, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0005007360/model b/drc11/eue6pax7/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..a3e714bcdfc6cb44fa2afabe576d5a85c313c844 --- /dev/null +++ b/drc11/eue6pax7/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7dc0353736f0e88c2b1629d577c9253fdf0f1c77a37c0b611de371abc67d3a9 +size 15803057 diff --git a/drc11/eue6pax7/cp_0006005760/cfg.json b/drc11/eue6pax7/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e271845caaf18ef259ee44be955a916149ec0dd6 --- /dev/null +++ b/drc11/eue6pax7/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1173, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0006005760/model b/drc11/eue6pax7/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..1c51b7d8d0a8e4ecf1097d41139c933c640e7897 --- /dev/null +++ b/drc11/eue6pax7/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5bec1348b46b4d807a0128748645e4e097657fd1357df007fcb4b1542b006d3 +size 15803057 diff --git a/drc11/eue6pax7/cp_0007009280/cfg.json b/drc11/eue6pax7/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..37863bda5b1cec240ac3b9d6cdd77feca8f79984 --- /dev/null +++ b/drc11/eue6pax7/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1369, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0007009280/model b/drc11/eue6pax7/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..0ce9265efa9e3cbe69a7b584c4e93a295d851cfc --- /dev/null +++ b/drc11/eue6pax7/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896d134954675d4acd26ec0a0e39b1642be686b5fb0af9072f3fc6191b394f5c +size 15803057 diff --git a/drc11/eue6pax7/cp_0008007680/cfg.json b/drc11/eue6pax7/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b85840b424370b2c95a0c0d0f50f28894dda93b6 --- /dev/null +++ b/drc11/eue6pax7/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1564, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0008007680/model b/drc11/eue6pax7/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..7bbbd201d90caad656d388a37210445d102cfdd7 --- /dev/null +++ b/drc11/eue6pax7/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87443ccb6da73e6c2ef8912d533cc5251fd250eea8abbed520db1d844f7e42c +size 15803057 diff --git a/drc11/eue6pax7/cp_0009011200/cfg.json b/drc11/eue6pax7/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3b89014d830ecbd22cd9f7ebff2e3969c0b145e3 --- /dev/null +++ b/drc11/eue6pax7/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1760, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0009011200/model b/drc11/eue6pax7/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..a865add490e6b7e68a9e048623368605e50cfed6 --- /dev/null +++ b/drc11/eue6pax7/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1a43c79b9c274e4388883b956d66c7d9dbb0e05dee92969178ced0b99a9170 +size 15803057 diff --git a/drc11/eue6pax7/cp_0010014720/cfg.json b/drc11/eue6pax7/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..baeb24104bc34a1258e23d0d037c49d9ba7c78e8 --- /dev/null +++ b/drc11/eue6pax7/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1956, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0010014720/model b/drc11/eue6pax7/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..33625db84da8d6e902cc0e860051a862c5dc445c --- /dev/null +++ b/drc11/eue6pax7/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a56cff7f595d084baf46900ef5b55b2b026464ece0373ec2b8362304468c05 +size 15803057 diff --git a/drc11/eue6pax7/cp_0011013120/cfg.json b/drc11/eue6pax7/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..56145a657027a0c07b23486fc391f01fae473c1f --- /dev/null +++ b/drc11/eue6pax7/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2151, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0011013120/model b/drc11/eue6pax7/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..8aa4048ff1dea961fc416fc4845a9ddd8c8cb280 --- /dev/null +++ b/drc11/eue6pax7/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7df34cbc987630546834ba189d977fe77bd2bb5e29e4d59e6546b0b590e7eae +size 15803057 diff --git a/drc11/eue6pax7/cp_0012016640/cfg.json b/drc11/eue6pax7/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..201170177ba28bffdd49972483da43d8a41fa59a --- /dev/null +++ b/drc11/eue6pax7/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2347, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0012016640/model b/drc11/eue6pax7/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..6dd74b0c6e7798ec793b7bc8a78b82fbe4079cb0 --- /dev/null +++ b/drc11/eue6pax7/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafb6f7442f1d58e7735f8fdfed561af0db5d0b7ffbeb1ce1675492d95bb97d7 +size 15803057 diff --git a/drc11/eue6pax7/cp_0013015040/cfg.json b/drc11/eue6pax7/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..adfafa51aa79403c15827de82c232459a3fa319d --- /dev/null +++ b/drc11/eue6pax7/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2542, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0013015040/model b/drc11/eue6pax7/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..3c798cf5f77601712e9eb590eb1ec5fe991c994a --- /dev/null +++ b/drc11/eue6pax7/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ba119c37958a1779d5217d4bbc0ef48b03b1e2857c54786368f59a40834ec4 +size 15803057 diff --git a/drc11/eue6pax7/cp_0014018560/cfg.json b/drc11/eue6pax7/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..94c2cb45daa03127b3aca799f78ed7cd73026ab2 --- /dev/null +++ b/drc11/eue6pax7/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2738, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0014018560/model b/drc11/eue6pax7/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..4a7fba4ae39a06f8022ad183403bc2e63e33d43e --- /dev/null +++ b/drc11/eue6pax7/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a25d3bfc34d6676a98dc63d5db67c6b5d4f6e5f0c5fb2c1623fe3b00869004d +size 15803057 diff --git a/drc11/eue6pax7/cp_0015022080/cfg.json b/drc11/eue6pax7/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..817b039de5209fa9f435eecf2374214335c8b80c --- /dev/null +++ b/drc11/eue6pax7/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2934, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0015022080/model b/drc11/eue6pax7/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..1955becd6b39fedddf9d6c8c4c740f8bf40f532c --- /dev/null +++ b/drc11/eue6pax7/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db16b96908b2cb5a8d722ed946b35da399a70bcb112bb5b36cfbcf253b637cf +size 15803057 diff --git a/drc11/eue6pax7/cp_0016020480/cfg.json b/drc11/eue6pax7/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..873c58ec240986b55eb6f9ac3086aa8c31a47b42 --- /dev/null +++ b/drc11/eue6pax7/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3129, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0016020480/model b/drc11/eue6pax7/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..944826c8f1fb9c8a78c9d4602a5676049c0e41ce --- /dev/null +++ b/drc11/eue6pax7/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c43f51fb16c8574b22acb38ad9cd82db76ed0fbc636ea3be6935c44078a7fe75 +size 15803057 diff --git a/drc11/eue6pax7/cp_0017024000/cfg.json b/drc11/eue6pax7/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5de98066425d27910a270ed681ea4ee511d5c065 --- /dev/null +++ b/drc11/eue6pax7/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3325, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0017024000/model b/drc11/eue6pax7/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..d3f51a9cce495635d8f364a6bd760333887c8f20 --- /dev/null +++ b/drc11/eue6pax7/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d325fd604f4c4bdd8baab172766bbbe62a7a492c16acfee7c248d7677be848d +size 15803057 diff --git a/drc11/eue6pax7/cp_0018022400/cfg.json b/drc11/eue6pax7/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8e7fd2d5c92f27ae2258504f3ab15b40cc60d03e --- /dev/null +++ b/drc11/eue6pax7/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3520, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0018022400/model b/drc11/eue6pax7/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..7211f83457c92e96e01cf18ec2d86fd7d75c19f0 --- /dev/null +++ b/drc11/eue6pax7/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d97efc7860061991d898b9988070cff8ab5fa6c1df61e88c65eebeb840d745cb +size 15803057 diff --git a/drc11/eue6pax7/cp_0019025920/cfg.json b/drc11/eue6pax7/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1b30920d2bd4f21d91cf24e8c45e4b392d4e2d --- /dev/null +++ b/drc11/eue6pax7/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3716, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0019025920/model b/drc11/eue6pax7/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..e8010892582040a29d96502e394cc6fa047c39c9 --- /dev/null +++ b/drc11/eue6pax7/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e96c412a1efbf19e4550153bda5d985c84fbfb151d4aa630bd7ef798ae27234 +size 15803057 diff --git a/drc11/eue6pax7/cp_0020029440/cfg.json b/drc11/eue6pax7/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..efad8cd9af2ac542046075e33dfb1d4aa4c24e94 --- /dev/null +++ b/drc11/eue6pax7/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3912, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0020029440/model b/drc11/eue6pax7/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..bc606fb32e9ec7d76ab6ec72e0bebc95f0b90640 --- /dev/null +++ b/drc11/eue6pax7/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae62ad1ec94386f6375ca278dd3a134c5acaac104315acea835177dd5c461dc2 +size 15803057 diff --git a/drc11/eue6pax7/cp_0030044160/cfg.json b/drc11/eue6pax7/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4838d3239503f82e1e6ced243f96c3bb8429217e --- /dev/null +++ b/drc11/eue6pax7/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 5868, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0030044160/model b/drc11/eue6pax7/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..25c2e02aee240c070f68279719213d1105be916a --- /dev/null +++ b/drc11/eue6pax7/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe38cb9c650e2eaffe035899277164ae3cdc490f4eb8ac9285e895e1a35fc08 +size 15803057 diff --git a/drc11/eue6pax7/cp_0040058880/cfg.json b/drc11/eue6pax7/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..31423da7f7555ffc8b2266a22a83b88189887c66 --- /dev/null +++ b/drc11/eue6pax7/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 7824, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0040058880/model b/drc11/eue6pax7/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..6b813e8eeb5fbf5a83588fa1375a1d22a02c23c0 --- /dev/null +++ b/drc11/eue6pax7/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44b9c2862f3ddb7b95cf8c9a6146ca975f5013e0f530f324613d68844d6ce05 +size 15803057 diff --git a/drc11/eue6pax7/cp_0050073600/cfg.json b/drc11/eue6pax7/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ddf6da2f1048850039b96fbf65ce80c0a7c50ee --- /dev/null +++ b/drc11/eue6pax7/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 9780, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0050073600/model b/drc11/eue6pax7/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..96f68906137efc9937ad36ebd73c165401905df0 --- /dev/null +++ b/drc11/eue6pax7/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b43f68f1a7e699cedb098758495221fd413055d654e4adc80805ebc32c259a9 +size 15803057 diff --git a/drc11/eue6pax7/cp_0060088320/cfg.json b/drc11/eue6pax7/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..48b2d7842109fddb2773a56518912fd6ae5b1607 --- /dev/null +++ b/drc11/eue6pax7/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 11736, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0060088320/model b/drc11/eue6pax7/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..3a20c3f4ad1e9d6ec4e8bc4da28921c86f27b3ba --- /dev/null +++ b/drc11/eue6pax7/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f971defb1c01e589d856a979407f7ac70e2d4025e0481ea5ce185e49caa8ca +size 15803057 diff --git a/drc11/eue6pax7/cp_0070103040/cfg.json b/drc11/eue6pax7/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..76dca9d74caac9c23f56c1a225eb8afb0e05b170 --- /dev/null +++ b/drc11/eue6pax7/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 13692, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0070103040/model b/drc11/eue6pax7/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..fb16166b12683a1caf22ebddcc550b62f78651ee --- /dev/null +++ b/drc11/eue6pax7/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b00adee0c23545a118b1cf80d2bb2d6453be4fcac6ff3478e427f17dd49b3c9 +size 15803057 diff --git a/drc11/eue6pax7/cp_0080117760/cfg.json b/drc11/eue6pax7/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c5a8bd1d3608c3f0db335ab6b0dceb9c06b72d07 --- /dev/null +++ b/drc11/eue6pax7/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 15648, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0080117760/model b/drc11/eue6pax7/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..0a89a4fe200b15fa7ad1786780d72b86b6671514 --- /dev/null +++ b/drc11/eue6pax7/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:498c12584728b17e86b6e7971e33377859f9cb37a3b707e731fec8f00a790778 +size 15803057 diff --git a/drc11/eue6pax7/cp_0090132480/cfg.json b/drc11/eue6pax7/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..36bbd336150c07e9a0da052d7e0a28e82f0324a9 --- /dev/null +++ b/drc11/eue6pax7/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 17604, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0090132480/model b/drc11/eue6pax7/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..eb773b6bf7877be8b636f278766bae6d5f2fa7a8 --- /dev/null +++ b/drc11/eue6pax7/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a0f83303d75cb2bbed6e7fb74db0f1bf7f152250a8d399783b2b2034c73dab +size 15803057 diff --git a/drc11/eue6pax7/cp_0100147200/cfg.json b/drc11/eue6pax7/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b0f88943c73f9ca9ca56153311ef798543d32d55 --- /dev/null +++ b/drc11/eue6pax7/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 19560, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0100147200/model b/drc11/eue6pax7/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..54b41d9dac3b70131755196dd6eb5d7218a3e8b5 --- /dev/null +++ b/drc11/eue6pax7/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e833ee72ac822d482002078d8eafc41ecf93aa51265b5116663083f0a7660ba9 +size 15803057 diff --git a/drc11/eue6pax7/cp_0110161920/cfg.json b/drc11/eue6pax7/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d675b7d41acc50c81f048ca29a628a0b2844cb17 --- /dev/null +++ b/drc11/eue6pax7/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 21516, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0110161920/model b/drc11/eue6pax7/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..6b9d0513797916f9929df89f1fbc861cb674f82c --- /dev/null +++ b/drc11/eue6pax7/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7518289fcaecd56fa3c645d8d36bf0e63a4d02d146b5b285d9673df5a7f429 +size 15803057 diff --git a/drc11/eue6pax7/cp_0120176640/cfg.json b/drc11/eue6pax7/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ca89dc6964278beb1e6e562a1b321085305758da --- /dev/null +++ b/drc11/eue6pax7/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 23472, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0120176640/model b/drc11/eue6pax7/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..636a2256299be63840d9dc1611e61e07d9a4cc7c --- /dev/null +++ b/drc11/eue6pax7/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4a62b47e5466f623cca4fc9cc67e981369afb52af11687e84d74e7f19632e1 +size 15803057 diff --git a/drc11/eue6pax7/cp_0130191360/cfg.json b/drc11/eue6pax7/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..206b657cda83ecbcf803267dfcd46935b91a9a74 --- /dev/null +++ b/drc11/eue6pax7/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 25428, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0130191360/model b/drc11/eue6pax7/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..c9df419309d3f5599c83100f3209f71ff76f1815 --- /dev/null +++ b/drc11/eue6pax7/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8508591f74c7aefb1f8d7159201b094236bcd78499924b52c408911662a896 +size 15803057 diff --git a/drc11/eue6pax7/cp_0140206080/cfg.json b/drc11/eue6pax7/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cb5df550bc196e38e43e9495d14965dba3b8020f --- /dev/null +++ b/drc11/eue6pax7/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 27384, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0140206080/model b/drc11/eue6pax7/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..4f772d013b4293ac58b78528555a845693987978 --- /dev/null +++ b/drc11/eue6pax7/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23467890f2bfce87d2e2474dcb41893bc615960d70a8f888a05fa7148241915 +size 15803057 diff --git a/drc11/eue6pax7/cp_0150220800/cfg.json b/drc11/eue6pax7/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0efbec409c75119633c5f871e56f30f36cfd1e38 --- /dev/null +++ b/drc11/eue6pax7/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 29340, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0150220800/model b/drc11/eue6pax7/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..e37b20ea7e8d96013e8286eb3a8177e7e5ddabc0 --- /dev/null +++ b/drc11/eue6pax7/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa17cfe12ce668fc7e38e2e4e30bcb45d8730000233508ac1de6da7bd9265b3 +size 15803057 diff --git a/drc11/eue6pax7/cp_0160235520/cfg.json b/drc11/eue6pax7/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ecfd0c25c7fbb084d34e7d80de400fa7b6b510c7 --- /dev/null +++ b/drc11/eue6pax7/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 31296, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0160235520/model b/drc11/eue6pax7/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..674d8070b7ebc12c950dbec7b3fdd721d0d396e2 --- /dev/null +++ b/drc11/eue6pax7/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62886c8730d66493de7adb970f1bbe5c159a94eae05a29a6fa750bd94c3bdd3 +size 15803057 diff --git a/drc11/eue6pax7/cp_0170250240/cfg.json b/drc11/eue6pax7/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6dea37669f6fcbeb78a0e095d3a3f7050fe587e1 --- /dev/null +++ b/drc11/eue6pax7/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 33252, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0170250240/model b/drc11/eue6pax7/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..bd7014f2cd176f49f8f1b62185a84022e529c937 --- /dev/null +++ b/drc11/eue6pax7/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5975d460ebd589be4d2adbf037a3fa1efc82d95dca90d72c78b8c17573d4f38b +size 15803057 diff --git a/drc11/eue6pax7/cp_0180264960/cfg.json b/drc11/eue6pax7/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..888e5848c4e6a140d8561d9d3704222cb2642cba --- /dev/null +++ b/drc11/eue6pax7/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 35208, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0180264960/model b/drc11/eue6pax7/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..c5c2684d7bbf3031c6b08c920375e27a6f76372d --- /dev/null +++ b/drc11/eue6pax7/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cfda6faa4e7ad8ee8e3f92eff88ea523f7ae0118cffcac9137fec1e6afdf297 +size 15803057 diff --git a/drc11/eue6pax7/cp_0190279680/cfg.json b/drc11/eue6pax7/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2fd54aa9880155cc281a4f3b892645a0b57e85 --- /dev/null +++ b/drc11/eue6pax7/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 37164, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0190279680/model b/drc11/eue6pax7/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..0a00d8a1a8629a3a1d4c63f91ebb6ea50d4aa495 --- /dev/null +++ b/drc11/eue6pax7/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5206910d35dec78a4f5f177900e09b91b84549761f1b57ad663a1f236fb5008 +size 15803057 diff --git a/drc11/eue6pax7/cp_0200294400/cfg.json b/drc11/eue6pax7/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0828429cbdc546fe60cb1cb3059ce7871c41678b --- /dev/null +++ b/drc11/eue6pax7/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 39120, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0200294400/model b/drc11/eue6pax7/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..20b0078bb6a0e8abac27164db15a879db0b5494f --- /dev/null +++ b/drc11/eue6pax7/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4155815c674a04ddd34010ae9a0b21042df1616cf7f1555bff27c5ff3498f40a +size 15803057 diff --git a/drc11/eue6pax7/cp_0300441600/cfg.json b/drc11/eue6pax7/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4c3edf67bf79824d8e5d6a9f704e5cc9201ff720 --- /dev/null +++ b/drc11/eue6pax7/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 58680, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0300441600/model b/drc11/eue6pax7/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..7697169fe9f6ec3bae035efa834451a2e8f84a80 --- /dev/null +++ b/drc11/eue6pax7/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c645df2bb9766de5b079501f7863cd04229f61e2a9313e9a28e17f9f071a6b87 +size 15803057 diff --git a/drc11/eue6pax7/cp_0400588800/cfg.json b/drc11/eue6pax7/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..02e446682938b645cb525c9c9e87154e308ae018 --- /dev/null +++ b/drc11/eue6pax7/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 78240, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0400588800/model b/drc11/eue6pax7/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..1835c9fc30bb2da0821480ba138241666d5f4c9b --- /dev/null +++ b/drc11/eue6pax7/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7958269d9f178d713208fc98ac8d693470a7a39e6bb37287185a720a5e85a9b9 +size 15803057 diff --git a/drc11/eue6pax7/cp_0500736000/cfg.json b/drc11/eue6pax7/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5eba1857360c3a2cea3247461c5d6923ffa9dab2 --- /dev/null +++ b/drc11/eue6pax7/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 97800, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0500736000/model b/drc11/eue6pax7/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..76914c79faeed37f484c7ff19bb9acce8ef56732 --- /dev/null +++ b/drc11/eue6pax7/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d089733efbcc107e45e5600f585c33cf9f323a1213feff588fa23fb90925c9 +size 15803057 diff --git a/drc11/eue6pax7/cp_0600883200/cfg.json b/drc11/eue6pax7/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..532fe43483bc909b4ff36d4fee8a87b1cec55f84 --- /dev/null +++ b/drc11/eue6pax7/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 117360, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0600883200/model b/drc11/eue6pax7/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..a967c44ebcb6876fb521dbdea9c417999728aec1 --- /dev/null +++ b/drc11/eue6pax7/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5456695ff343aed7a43989f66f65e3efaa629d425061d481322ac477e5ae1dd +size 15803057 diff --git a/drc11/eue6pax7/cp_0701030400/cfg.json b/drc11/eue6pax7/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6e109af7567b0dcb0a7b284dcc0c01728be81180 --- /dev/null +++ b/drc11/eue6pax7/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 136920, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0701030400/model b/drc11/eue6pax7/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..90ab533e4e53308030ba31c864784250ea2b1f77 --- /dev/null +++ b/drc11/eue6pax7/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50a57bb3613a0c968ffb7bd09582c321fb5866d5b97d3fd6b450b6466572922 +size 15803057 diff --git a/drc11/eue6pax7/cp_0801177600/cfg.json b/drc11/eue6pax7/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..17f71dbbca9c807b060c8b3c705b4c3593b4f23b --- /dev/null +++ b/drc11/eue6pax7/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 156480, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0801177600/model b/drc11/eue6pax7/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..38d3047a5ec8c9dd68c6d6761415a043fce5eca9 --- /dev/null +++ b/drc11/eue6pax7/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a23945b1f297a8a044fe8549063b66174143c34288433d039456688531e9f72 +size 15803057 diff --git a/drc11/eue6pax7/cp_0901324800/cfg.json b/drc11/eue6pax7/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2eae9b9a6f1f002757306e4f8f2e905db8ed3547 --- /dev/null +++ b/drc11/eue6pax7/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 176040, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_0901324800/model b/drc11/eue6pax7/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..d5d340194e290974cd7fd3fc1bdaf594a7f90011 --- /dev/null +++ b/drc11/eue6pax7/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6266b6bde4b0710265b4539d260c3f8f8d1a0b46a1ad6a129c4b952635c8019 +size 15803057 diff --git a/drc11/eue6pax7/cp_1001472000/cfg.json b/drc11/eue6pax7/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0f9a985a1dd6d8d118001d7787ac9528eddae59d --- /dev/null +++ b/drc11/eue6pax7/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195600, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1001472000/model b/drc11/eue6pax7/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..790c53bde86972fcc5d7ad240d4f938d34d31491 --- /dev/null +++ b/drc11/eue6pax7/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb6a887f29fc4f44dfd9199241d34a4d21c0041e40798f88dcbe2b24495faa9 +size 15803057 diff --git a/drc11/eue6pax7/cp_1101619200/cfg.json b/drc11/eue6pax7/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..63c30162dfa344dd57f0be54f77ca67dcf7fd3aa --- /dev/null +++ b/drc11/eue6pax7/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 215160, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1101619200/model b/drc11/eue6pax7/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..bc81cc2a0c7c4de1f8bb66d1063de5b73d7dd264 --- /dev/null +++ b/drc11/eue6pax7/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d8ff34da3603a43567f3ef45551e40098abd68330d5e585d5333da06bcf7cf +size 15803057 diff --git a/drc11/eue6pax7/cp_1201766400/cfg.json b/drc11/eue6pax7/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3a69d6438ce76c486ae2e1149ec9614035eb01dd --- /dev/null +++ b/drc11/eue6pax7/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 234720, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1201766400/model b/drc11/eue6pax7/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..65f06ce55b43317f9c71a8992b7f2b779d836ddb --- /dev/null +++ b/drc11/eue6pax7/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676955cec5fc399c2b0dd3c6e9701e10e658eb0badbbdc5279b6e27809b8a11f +size 15803057 diff --git a/drc11/eue6pax7/cp_1301913600/cfg.json b/drc11/eue6pax7/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8f79588f3c2be16885f37a0800b49b9f217c0cd1 --- /dev/null +++ b/drc11/eue6pax7/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 254280, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1301913600/model b/drc11/eue6pax7/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..594ab277f08f37958d887bf0ae4913c249b437d1 --- /dev/null +++ b/drc11/eue6pax7/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ae43c61393dc158370012bb5a18c0070f0415b1a53b4accf2d8741810670b3 +size 15803057 diff --git a/drc11/eue6pax7/cp_1402060800/cfg.json b/drc11/eue6pax7/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1e5b57407760e6d6a16023ce261d12ce0930cff9 --- /dev/null +++ b/drc11/eue6pax7/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 273840, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1402060800/model b/drc11/eue6pax7/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..59bc717010f9f15d710a9194a22f13e77ee06b4c --- /dev/null +++ b/drc11/eue6pax7/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352eebecf4c371e0778d290fc2c6c3a98a5681958598d9a774eb3ef7abf75f2f +size 15803057 diff --git a/drc11/eue6pax7/cp_1502208000/cfg.json b/drc11/eue6pax7/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..273ac56caab56e182540ec0b5b8b776883892cfa --- /dev/null +++ b/drc11/eue6pax7/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 293400, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1502208000/model b/drc11/eue6pax7/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..88f32c1a91fd687ca5fe4c378ed2530908298d71 --- /dev/null +++ b/drc11/eue6pax7/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb157d2766cb038ec9d2893ff6e470351e7071dd15be4d772563804a093f88a +size 15803057 diff --git a/drc11/eue6pax7/cp_1602355200/cfg.json b/drc11/eue6pax7/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e3c241cf324ce90ffd0919390ff328301ea59edd --- /dev/null +++ b/drc11/eue6pax7/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 312960, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1602355200/model b/drc11/eue6pax7/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..ffe00a5a54ffea093c4ce82ff1024588c6fd9400 --- /dev/null +++ b/drc11/eue6pax7/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab05b4fa1eff8a5ebfb6290bec8cbcabec981b29f6e3c1cd040a43153918ca94 +size 15803057 diff --git a/drc11/eue6pax7/cp_1702502400/cfg.json b/drc11/eue6pax7/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..49bfc69ca495d2e188b60eb151b7cdeae5212002 --- /dev/null +++ b/drc11/eue6pax7/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 332520, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1702502400/model b/drc11/eue6pax7/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..61b56b149c644b41121fea0f26c31950cf75f4aa --- /dev/null +++ b/drc11/eue6pax7/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a52e736f585d1e97c3382bea76efa593d9672f28035da6d289c13f5efa85ade9 +size 15803057 diff --git a/drc11/eue6pax7/cp_1802649600/cfg.json b/drc11/eue6pax7/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..452761339ce37828a41e7f91fb00418d21caad70 --- /dev/null +++ b/drc11/eue6pax7/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 352080, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1802649600/model b/drc11/eue6pax7/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..5c95b02ecfb45d0d09edc7696e06276e9053e332 --- /dev/null +++ b/drc11/eue6pax7/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:225d2421b2cbf86f9792cf0a58fd03c16c6965735eef4a0deed7521d4d9143f4 +size 15803057 diff --git a/drc11/eue6pax7/cp_1902796800/cfg.json b/drc11/eue6pax7/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..73e446be4c607f37fa9515a78a726b0bdc457544 --- /dev/null +++ b/drc11/eue6pax7/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 371640, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_1902796800/model b/drc11/eue6pax7/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..a8fca603a553f1963a2fbb37748a7018a7056a44 --- /dev/null +++ b/drc11/eue6pax7/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebad1e91ba21efa8449addd4bb420019a9da73c19aa23f08eeb38ea6df57c556 +size 15803057 diff --git a/drc11/eue6pax7/cp_2002944000/cfg.json b/drc11/eue6pax7/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..43fd99bef201519f816c28f1fcbfbbee33d38c40 --- /dev/null +++ b/drc11/eue6pax7/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 709101444}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 249823845, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391200, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc11/eue6pax7/cp_2002944000/model b/drc11/eue6pax7/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..db5f12a85f6a3917c542fb1634ff18045aab129c --- /dev/null +++ b/drc11/eue6pax7/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42eb95ae04cab23d2cc650a42c278bf44ddf1ba16de98704d894b7c159961275 +size 15803057 diff --git a/drc11/nom9jda6/cp_0000998400/cfg.json b/drc11/nom9jda6/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ec4a3185da1d2f884356821556d9c6f0427e811c --- /dev/null +++ b/drc11/nom9jda6/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0000998400/model b/drc11/nom9jda6/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..56497ca81ba30d14c5e50449d219874417a3eb7a --- /dev/null +++ b/drc11/nom9jda6/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:944c70eaca0db05e5f1685223bd9a32c3671917d381e5338779cb6df9c75f9ac +size 15803057 diff --git a/drc11/nom9jda6/cp_0002001920/cfg.json b/drc11/nom9jda6/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6c7cb7aed72bc80131b5c6b4b81fe9fc17042a8c --- /dev/null +++ b/drc11/nom9jda6/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0002001920/model b/drc11/nom9jda6/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..d4db92ce0cd2484de721fd074461e5214eee360d --- /dev/null +++ b/drc11/nom9jda6/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a8c6442bf3d88a4c38424e2d517c315e382cacc71aea093f4bed654a7ecc1a +size 15803057 diff --git a/drc11/nom9jda6/cp_0003000320/cfg.json b/drc11/nom9jda6/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c91422301e824deb266e2fdc45e08eb9dc59d4de --- /dev/null +++ b/drc11/nom9jda6/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 586, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0003000320/model b/drc11/nom9jda6/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..13ee485502c3e0a44c6520e3ae3dbe825469acd1 --- /dev/null +++ b/drc11/nom9jda6/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56982d85c60adf84f98ac71f71142c6727e64204e71473fd3e7ace18e64f61fc +size 15803057 diff --git a/drc11/nom9jda6/cp_0004003840/cfg.json b/drc11/nom9jda6/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2987e02759154ea9fbc4f5053ab893752eb273bb --- /dev/null +++ b/drc11/nom9jda6/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 782, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0004003840/model b/drc11/nom9jda6/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..129200055354351f7dbfc218b97299e728d2ceed --- /dev/null +++ b/drc11/nom9jda6/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b3659dbcba4d7f4d12129af2a3ae5937fb16f815e3453855e6069428dfabcf +size 15803057 diff --git a/drc11/nom9jda6/cp_0005007360/cfg.json b/drc11/nom9jda6/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5fe1b15b57005bbe8a5fc076e0b1e554552bd365 --- /dev/null +++ b/drc11/nom9jda6/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 978, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0005007360/model b/drc11/nom9jda6/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..0e85fe103ea856c42cf19da5582d207a229dd388 --- /dev/null +++ b/drc11/nom9jda6/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce738daad565b401156347a5167c3b963961519bf70bdbcc7bb9eef84a479f57 +size 15803057 diff --git a/drc11/nom9jda6/cp_0006005760/cfg.json b/drc11/nom9jda6/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..022ca16315de5c0c5c346f751a76ec40de8704ca --- /dev/null +++ b/drc11/nom9jda6/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1173, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0006005760/model b/drc11/nom9jda6/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..414293df8f16e05f36077886a3d2fb35e0248df8 --- /dev/null +++ b/drc11/nom9jda6/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432e3c2e02489569d29a77aaf700f5d1d6e91748f990918861d863bfb210961d +size 15803057 diff --git a/drc11/nom9jda6/cp_0007009280/cfg.json b/drc11/nom9jda6/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4bf196fbe44b425c3b2c6d1f57a17f9bb9f82b38 --- /dev/null +++ b/drc11/nom9jda6/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1369, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0007009280/model b/drc11/nom9jda6/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..ee82ad2eb48390df106cbb89d8bd2437f8007d23 --- /dev/null +++ b/drc11/nom9jda6/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1e14c6865ed47af6faa4ff6b5119d63ce60e2bf132bd45218918b6324626a1 +size 15803057 diff --git a/drc11/nom9jda6/cp_0008007680/cfg.json b/drc11/nom9jda6/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8a0e97e99456b5daf3b92eb0401185f605aee592 --- /dev/null +++ b/drc11/nom9jda6/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1564, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0008007680/model b/drc11/nom9jda6/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..71e31595b53dd89ddf7cdbbbb37a2e0cdc527727 --- /dev/null +++ b/drc11/nom9jda6/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09692f2966ab82e45474283f564c35a6c8c0f3e55737e25523cbf8cccb50d660 +size 15803057 diff --git a/drc11/nom9jda6/cp_0009011200/cfg.json b/drc11/nom9jda6/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae08ab3cd08ec68ce3520c2708f250b7265cb5c5 --- /dev/null +++ b/drc11/nom9jda6/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1760, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0009011200/model b/drc11/nom9jda6/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..df8dd6e05b5d0438e7adc1efde72a2b7f3473e71 --- /dev/null +++ b/drc11/nom9jda6/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48893c783cf61aeb616ed35b7a673999ad6cf9fd7b9756646826de3a52483ec5 +size 15803057 diff --git a/drc11/nom9jda6/cp_0010014720/cfg.json b/drc11/nom9jda6/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..36d1e1a14419bffcb8d098998e1aa59d0d2f6f0a --- /dev/null +++ b/drc11/nom9jda6/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1956, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0010014720/model b/drc11/nom9jda6/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..ee80c3b3b0fdc64fea3f507d9c2499b1d2d801ce --- /dev/null +++ b/drc11/nom9jda6/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9fea058f323d6bb7a55e0ae16ae5805c7c4eb86e2cd8d7d118bb6656961ca3a +size 15803057 diff --git a/drc11/nom9jda6/cp_0011013120/cfg.json b/drc11/nom9jda6/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f51040b209659d21b0425a03372a49e6f066d17f --- /dev/null +++ b/drc11/nom9jda6/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2151, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0011013120/model b/drc11/nom9jda6/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..5b9c78b37fc71a520f9cae6c00b4cc143da11eeb --- /dev/null +++ b/drc11/nom9jda6/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c0ae2f2ea980e7b213a08eb7f2334ebbf79ddfd81f982add6a6218f15290bbf +size 15803057 diff --git a/drc11/nom9jda6/cp_0012016640/cfg.json b/drc11/nom9jda6/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8475224e8fb00c61a882fae1ffe0c75658058583 --- /dev/null +++ b/drc11/nom9jda6/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2347, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0012016640/model b/drc11/nom9jda6/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..df3a50c7abec2cdd5fe9c9ee5f881a996f172b95 --- /dev/null +++ b/drc11/nom9jda6/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439a2b27de50448b69d49b0579e13aa94e8f8b26c5f3319a209b94f6c5835c0f +size 15803057 diff --git a/drc11/nom9jda6/cp_0013015040/cfg.json b/drc11/nom9jda6/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..adf5fcb47917856cc40f09e77c059c6c90f7ac45 --- /dev/null +++ b/drc11/nom9jda6/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2542, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0013015040/model b/drc11/nom9jda6/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..fd6dec8b56aadc7147fca137d7d8618775657a8e --- /dev/null +++ b/drc11/nom9jda6/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02152becb5188fb87d8b2f1c401b3f0103dee5e5bb6b9289ff29b960ccabcd9e +size 15803057 diff --git a/drc11/nom9jda6/cp_0014018560/cfg.json b/drc11/nom9jda6/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..34300559f364cfd53a4537eec70b2f32c74d936c --- /dev/null +++ b/drc11/nom9jda6/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2738, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0014018560/model b/drc11/nom9jda6/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..d2f992a68bed9e3588de0ebfc8bbc749a1446b97 --- /dev/null +++ b/drc11/nom9jda6/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4b53a39e85339bbcf0cfc9317e71a9380a1e0889587c0b13693862b1ca4f13 +size 15803057 diff --git a/drc11/nom9jda6/cp_0015022080/cfg.json b/drc11/nom9jda6/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..27b64676f242167335a5a8c91b95b169073e1416 --- /dev/null +++ b/drc11/nom9jda6/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2934, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0015022080/model b/drc11/nom9jda6/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..03e20ce3b9f967b336063209173e0faee5115022 --- /dev/null +++ b/drc11/nom9jda6/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4a27e1ed4fed1bd290dd49cdd00baeff7ac76636bfede79d3ac6cc730213d4 +size 15803057 diff --git a/drc11/nom9jda6/cp_0016020480/cfg.json b/drc11/nom9jda6/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cdccef07d3243ae8098de23cbb66f020ce6cfc37 --- /dev/null +++ b/drc11/nom9jda6/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3129, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0016020480/model b/drc11/nom9jda6/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..a83cbe30a14e8d3c2be925e05c1d0c76eb6302a4 --- /dev/null +++ b/drc11/nom9jda6/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18edb766ff5250d93d84c57a124065b0e6d9e1ca41e5c24f916ee56883f4f84a +size 15803057 diff --git a/drc11/nom9jda6/cp_0017024000/cfg.json b/drc11/nom9jda6/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..434e3c43f562993b123c432096ab89c5f857b3cf --- /dev/null +++ b/drc11/nom9jda6/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3325, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0017024000/model b/drc11/nom9jda6/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..e1b1607c60a244513b9ac3e6232d43a7fd7d364b --- /dev/null +++ b/drc11/nom9jda6/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1dbc149dab8fe5a5ec2fe7faa7861f738dfad868e536ff52d48c7a38852e6e8 +size 15803057 diff --git a/drc11/nom9jda6/cp_0018022400/cfg.json b/drc11/nom9jda6/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2d0ba04fa712cdd5bcce739724d24c2e9d3cfe97 --- /dev/null +++ b/drc11/nom9jda6/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3520, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0018022400/model b/drc11/nom9jda6/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..3a6cd6e73780a02b425207e033a4b2dbf45f04de --- /dev/null +++ b/drc11/nom9jda6/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c1c0dd1be5965aef93a59f2f13c9c1c157514e2dfdc3e4d301ed541992a53f7 +size 15803057 diff --git a/drc11/nom9jda6/cp_0019025920/cfg.json b/drc11/nom9jda6/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0cb1df659509ea6d5c25a09f5950f9669d34cf58 --- /dev/null +++ b/drc11/nom9jda6/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3716, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0019025920/model b/drc11/nom9jda6/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..4fb50e71f99cd0bbd379ca553d59e1f2049e0392 --- /dev/null +++ b/drc11/nom9jda6/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04bc44eade9f95920c18b93b1fb94f36faef2be350e40bc98d8d0dbb370661e +size 15803057 diff --git a/drc11/nom9jda6/cp_0020029440/cfg.json b/drc11/nom9jda6/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dc8eb3a459648be709ef388356e0a5de5229c655 --- /dev/null +++ b/drc11/nom9jda6/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3912, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0020029440/model b/drc11/nom9jda6/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..b21704c0ac1b40caef658fcf5984c4e5dd29afff --- /dev/null +++ b/drc11/nom9jda6/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a59b0f54167b9d376f69e15759e80944e8fa679ef24931213538292ffbc3af +size 15803057 diff --git a/drc11/nom9jda6/cp_0030044160/cfg.json b/drc11/nom9jda6/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dd1333ca46c79ca475f1dff9eddaff2bb40dca81 --- /dev/null +++ b/drc11/nom9jda6/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 5868, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0030044160/model b/drc11/nom9jda6/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..4bd8fc0889d3f60223b3c1dc02fb7a8ad873e3f1 --- /dev/null +++ b/drc11/nom9jda6/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2dcd2b2241658be4f4df19994c0e16ea5d47c6943487f1673690ce30ae6e8e7 +size 15803057 diff --git a/drc11/nom9jda6/cp_0040058880/cfg.json b/drc11/nom9jda6/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..af28ce54f412bdb921e12070be80df8ce064149c --- /dev/null +++ b/drc11/nom9jda6/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 7824, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0040058880/model b/drc11/nom9jda6/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..c2951caa22110aa0c3fd709f877a8443ab1536bf --- /dev/null +++ b/drc11/nom9jda6/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c975390f96d1f5717bddbf4cf78ec1458fa474b4daa13f0ea7801afe87a9d9a +size 15803057 diff --git a/drc11/nom9jda6/cp_0050073600/cfg.json b/drc11/nom9jda6/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9429da6260d71dcb2c663c3fcd776077b37a4c89 --- /dev/null +++ b/drc11/nom9jda6/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 9780, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0050073600/model b/drc11/nom9jda6/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..6ed548240a8e5db63f305ac8a11d5e896bf35bd6 --- /dev/null +++ b/drc11/nom9jda6/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12649df68966b982d3c524d05e8c440ffdc975cc1ab63cad9546ded1c57b976 +size 15803057 diff --git a/drc11/nom9jda6/cp_0060088320/cfg.json b/drc11/nom9jda6/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a8d7efbef6d70a220d36a04f623e7c326c7242b2 --- /dev/null +++ b/drc11/nom9jda6/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 11736, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0060088320/model b/drc11/nom9jda6/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..30f9649e1e92a70338d9112ff0670fb3216c490c --- /dev/null +++ b/drc11/nom9jda6/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e92eb894236c81b185418c05abe4cc4c363097c4b1d41abea4549ed54cbbc67 +size 15803057 diff --git a/drc11/nom9jda6/cp_0070103040/cfg.json b/drc11/nom9jda6/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..08e38d16c0d3029112c3b08ea770ed202790d2c7 --- /dev/null +++ b/drc11/nom9jda6/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 13692, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0070103040/model b/drc11/nom9jda6/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..3406bdc1059ec5fe62814628ac9c2a030d81f7bc --- /dev/null +++ b/drc11/nom9jda6/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f46341731b57b56fd7312b5eabfa2f7a8aab690af4d5e8f5450af3cdfd00ba +size 15803057 diff --git a/drc11/nom9jda6/cp_0080117760/cfg.json b/drc11/nom9jda6/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7295e470abd5c032d8213c6a3233d1894ad8c345 --- /dev/null +++ b/drc11/nom9jda6/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 15648, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0080117760/model b/drc11/nom9jda6/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..001c6eb69e7f71620c3e7eb5462bf02275aea626 --- /dev/null +++ b/drc11/nom9jda6/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e4b4a80b1b0e45d42a0df35d62345c7034866a2bd011b867b2f864ce1cbff7 +size 15803057 diff --git a/drc11/nom9jda6/cp_0090132480/cfg.json b/drc11/nom9jda6/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..31b019ba81d166b6f027355081c1fcaffbc80c62 --- /dev/null +++ b/drc11/nom9jda6/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 17604, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0090132480/model b/drc11/nom9jda6/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..7a844bd72dd87f810e4650e08f8cbce7ac78b486 --- /dev/null +++ b/drc11/nom9jda6/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be4575c53335a08668f593f4f0ebbd2c428c6e18b617caefa2b5e8401e90eb34 +size 15803057 diff --git a/drc11/nom9jda6/cp_0100147200/cfg.json b/drc11/nom9jda6/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8c358d7e531ad40a992c43759b1f2d01011c6596 --- /dev/null +++ b/drc11/nom9jda6/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 19560, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0100147200/model b/drc11/nom9jda6/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..eb01494a8d64579a7ba81d60269215df54a406c5 --- /dev/null +++ b/drc11/nom9jda6/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3d8fb99967e14e09099efede77dfa87e985f930535224300b3430c2dad89c1 +size 15803057 diff --git a/drc11/nom9jda6/cp_0110161920/cfg.json b/drc11/nom9jda6/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..79584c2c7799b8c2913a604073cdcac7d77ae890 --- /dev/null +++ b/drc11/nom9jda6/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 21516, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0110161920/model b/drc11/nom9jda6/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..69d4b63f06abb5a5fa2615e100cd123e1c19b226 --- /dev/null +++ b/drc11/nom9jda6/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3932731c8221cc6095367c5e8b0f518419b34aa99d4e4bbfbc4c12c0995195 +size 15803057 diff --git a/drc11/nom9jda6/cp_0120176640/cfg.json b/drc11/nom9jda6/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..eaf08b93f5f981f1359f4ae50726fb25e8ead58b --- /dev/null +++ b/drc11/nom9jda6/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 23472, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0120176640/model b/drc11/nom9jda6/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..2c506c2957286dd725aa8066675308b5578ca23b --- /dev/null +++ b/drc11/nom9jda6/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e3fdc0cc0b117faf5fb6b00df9a80c6e76a6bfe55ca618ed53623dafd23371 +size 15803057 diff --git a/drc11/nom9jda6/cp_0130191360/cfg.json b/drc11/nom9jda6/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cd27a2738c06dc288d02d80e263d36cec7d61783 --- /dev/null +++ b/drc11/nom9jda6/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 25428, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0130191360/model b/drc11/nom9jda6/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..8fac4946d5c830424fd8a4ce9888947f91b64f65 --- /dev/null +++ b/drc11/nom9jda6/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5470fa19588c0750d5abd02bd55028ba06149af1ffa7b1b7b94655b35d8d74dc +size 15803057 diff --git a/drc11/nom9jda6/cp_0140206080/cfg.json b/drc11/nom9jda6/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c3b9e603839ffbf021442c2fcfb22a1fed09acee --- /dev/null +++ b/drc11/nom9jda6/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 27384, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0140206080/model b/drc11/nom9jda6/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..eec04c658696a2164dafbdb3b80ff3d79233c201 --- /dev/null +++ b/drc11/nom9jda6/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c136ec4b9355bc708ad3b3849426af6d43e63ba1331685bb7770329d8449967d +size 15803057 diff --git a/drc11/nom9jda6/cp_0150220800/cfg.json b/drc11/nom9jda6/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..10bbba698511c81254a02db705d727ec96702f0a --- /dev/null +++ b/drc11/nom9jda6/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 29340, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0150220800/model b/drc11/nom9jda6/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..ca66ab9ed5c179b716923b19c3fbeb15a7987e36 --- /dev/null +++ b/drc11/nom9jda6/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1832653399584a29962fd16b198b0fdfca1d580b34bd53f19d6cdbd3d529bd3e +size 15803057 diff --git a/drc11/nom9jda6/cp_0160235520/cfg.json b/drc11/nom9jda6/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4cdf3545cde84d4fbd20f5b704537c018e23ff0f --- /dev/null +++ b/drc11/nom9jda6/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 31296, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0160235520/model b/drc11/nom9jda6/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..941362d797ddd225f8957ec5637fe18e29835031 --- /dev/null +++ b/drc11/nom9jda6/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e59c08fceadbfe6b311bdd5cbabc7f495a0bf54a392d6e47af30f644dc3c84a8 +size 15803057 diff --git a/drc11/nom9jda6/cp_0170250240/cfg.json b/drc11/nom9jda6/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae3d50a4549a533aab7f3cf3ce243130a9e74a29 --- /dev/null +++ b/drc11/nom9jda6/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 33252, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0170250240/model b/drc11/nom9jda6/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..c325830c516cfc771d7f8539539ef65e430fb9d6 --- /dev/null +++ b/drc11/nom9jda6/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6f542a4433b3bcd55e6f8e7c2b99867084ca5d5978353bdf121c847720f3d4 +size 15803057 diff --git a/drc11/nom9jda6/cp_0180264960/cfg.json b/drc11/nom9jda6/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19a03e03c0a6f476e739b581e60afce65cdcf01c --- /dev/null +++ b/drc11/nom9jda6/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 35208, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0180264960/model b/drc11/nom9jda6/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..dda09fafc833c91af056f5c498c5fb1c0e8a4c95 --- /dev/null +++ b/drc11/nom9jda6/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a34f54beff5305b2f9dbaa3fbc32c1838c0f6df52ca58073da6dcc02a7f26a4 +size 15803057 diff --git a/drc11/nom9jda6/cp_0190279680/cfg.json b/drc11/nom9jda6/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bf9a60cd6d00e759bc006750623df4fac888d158 --- /dev/null +++ b/drc11/nom9jda6/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 37164, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0190279680/model b/drc11/nom9jda6/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..7ae6ce3c8ad38fe6d5830a497514ad971ec1813c --- /dev/null +++ b/drc11/nom9jda6/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:077e68ff38d582aac42cbf4afdd4c1f63a172463d0895d5cb757fadb4337908c +size 15803057 diff --git a/drc11/nom9jda6/cp_0200294400/cfg.json b/drc11/nom9jda6/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d4fe71e903796bc7956eb03a1ac2ea8bcf9dbb0f --- /dev/null +++ b/drc11/nom9jda6/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 39120, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0200294400/model b/drc11/nom9jda6/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..9480b327cfe65db761185c1d3a30d22f52cf5d6c --- /dev/null +++ b/drc11/nom9jda6/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c829d57cf5fb551b03e9111be90ae78baa39117fa08edb3d8ebbf3370ce03e +size 15803057 diff --git a/drc11/nom9jda6/cp_0300441600/cfg.json b/drc11/nom9jda6/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d27485d9f9a3010e1cabdcf3ef8e9278b7bb32eb --- /dev/null +++ b/drc11/nom9jda6/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 58680, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0300441600/model b/drc11/nom9jda6/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..df1545c59465da74bc7789ab85922a7f0249fd61 --- /dev/null +++ b/drc11/nom9jda6/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4efe86b7bfad8152d20d7746331b6b843dfd172f2ef81c17f8abc74091fdf8d +size 15803057 diff --git a/drc11/nom9jda6/cp_0400588800/cfg.json b/drc11/nom9jda6/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..31d06ae868460734bcf66c8002d7a89a9961b3af --- /dev/null +++ b/drc11/nom9jda6/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 78240, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0400588800/model b/drc11/nom9jda6/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..e2976dda4e0ba640785b283c202a7e6bfc6dbe4c --- /dev/null +++ b/drc11/nom9jda6/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24438d6fc52131db488a252480193bcaa5d9478d93ac7a07ae9c1c4c258cf0d5 +size 15803057 diff --git a/drc11/nom9jda6/cp_0500736000/cfg.json b/drc11/nom9jda6/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d176e12197828c61b4bd24003be5d6acd4e02736 --- /dev/null +++ b/drc11/nom9jda6/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 97800, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0500736000/model b/drc11/nom9jda6/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..6374c681533b9df5a24411192b797b0f37ac0b97 --- /dev/null +++ b/drc11/nom9jda6/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99760108b743c80f35f4ac485815edd35b59ca3c7a9da62493927016b3211bab +size 15803057 diff --git a/drc11/nom9jda6/cp_0600883200/cfg.json b/drc11/nom9jda6/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0d4e0f639de23878beb4b302e6c3ade9658637f4 --- /dev/null +++ b/drc11/nom9jda6/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 117360, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0600883200/model b/drc11/nom9jda6/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..2ad41a3b0c275e5b55ecafc1e2e7cdc6264d00a9 --- /dev/null +++ b/drc11/nom9jda6/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d3951c6a69e55d970f778f2c8a2b664d4b652fef2aca1de0f139846ad049f0 +size 15803057 diff --git a/drc11/nom9jda6/cp_0701030400/cfg.json b/drc11/nom9jda6/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3950bd9c26229ba07cf8c68862cdb84490e6311c --- /dev/null +++ b/drc11/nom9jda6/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 136920, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0701030400/model b/drc11/nom9jda6/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..9159e1f8b454d7d435a95801535de941b48222e2 --- /dev/null +++ b/drc11/nom9jda6/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5009505dcb0567ff28b8dff2939efe510560db8edfdca49bef95ff3424184b5b +size 15803057 diff --git a/drc11/nom9jda6/cp_0801177600/cfg.json b/drc11/nom9jda6/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..26972d580a71683b5c4924bfbc33bffe9f3998f0 --- /dev/null +++ b/drc11/nom9jda6/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 156480, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0801177600/model b/drc11/nom9jda6/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..06c830bea8e48fb12cfda895ee6cdde879941a9b --- /dev/null +++ b/drc11/nom9jda6/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27e0386e60c10cd28b40e8f679cf9ddd0354a6eb23c18ff24acd1b3d4c3abd5 +size 15803057 diff --git a/drc11/nom9jda6/cp_0901324800/cfg.json b/drc11/nom9jda6/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1bf63199cd0da0288bc5ff976a1bc484d5b99603 --- /dev/null +++ b/drc11/nom9jda6/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 176040, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_0901324800/model b/drc11/nom9jda6/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..040399aa1d59a17c33f2578315788ec1910db6ac --- /dev/null +++ b/drc11/nom9jda6/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e22b37ffe315a2141d2abd20c2a8e93476fd53c435d1cab6a0e61c4a12a35826 +size 15803057 diff --git a/drc11/nom9jda6/cp_1001472000/cfg.json b/drc11/nom9jda6/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0f7fc156ec61fc3f22ba5c14831fa5d7a93faace --- /dev/null +++ b/drc11/nom9jda6/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195600, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1001472000/model b/drc11/nom9jda6/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..e0e4f6b798c6b7974992b9f455d1ef9835c52fd3 --- /dev/null +++ b/drc11/nom9jda6/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69889ce69c65b97e566262995e403d2851d7f9c97beb20194fc3c0782842653f +size 15803057 diff --git a/drc11/nom9jda6/cp_1101619200/cfg.json b/drc11/nom9jda6/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5a298446b390c5d4018ec40f6bf3eaeea6cfbc17 --- /dev/null +++ b/drc11/nom9jda6/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 215160, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1101619200/model b/drc11/nom9jda6/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..be4253809eb62f2d18c1afb960fde9e4ecc89246 --- /dev/null +++ b/drc11/nom9jda6/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ab7726d59c8ef1a29e6ae4047c9ff54a66aeff2910de17235da91e1a07384c +size 15803057 diff --git a/drc11/nom9jda6/cp_1201766400/cfg.json b/drc11/nom9jda6/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9db20b61d820667a2895e4be2fa081ec880ae975 --- /dev/null +++ b/drc11/nom9jda6/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 234720, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1201766400/model b/drc11/nom9jda6/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..e636cb3a65baaffc9c2d43a7eb6cfb196b749f57 --- /dev/null +++ b/drc11/nom9jda6/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72017d08444cc1eb6e65db20d934eeea6c226b992d1e44ae312151ee40678170 +size 15803057 diff --git a/drc11/nom9jda6/cp_1301913600/cfg.json b/drc11/nom9jda6/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cab2d28e56f6b9b5a6db077002096ead70abbee4 --- /dev/null +++ b/drc11/nom9jda6/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 254280, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1301913600/model b/drc11/nom9jda6/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..53920495f5da558701048868172cb92a2ece12a4 --- /dev/null +++ b/drc11/nom9jda6/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a79a3441e7a6d02993279e2f35849213d223d2461d2e28e9f1134fd6553523dd +size 15803057 diff --git a/drc11/nom9jda6/cp_1402060800/cfg.json b/drc11/nom9jda6/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..979f1d1f5431000cc5a381ac9958e327a4e1cff2 --- /dev/null +++ b/drc11/nom9jda6/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 273840, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1402060800/model b/drc11/nom9jda6/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..eba2a89155321daf8937d6c4d3852180a2fad0b8 --- /dev/null +++ b/drc11/nom9jda6/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226e6add91b93305dbd96f7ed825266c576a4046c6bfe5ad3c9b62f632b39f08 +size 15803057 diff --git a/drc11/nom9jda6/cp_1502208000/cfg.json b/drc11/nom9jda6/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dda07b7bd67b1bb150c2a84a9037fcdd4f6cd14b --- /dev/null +++ b/drc11/nom9jda6/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 293400, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1502208000/model b/drc11/nom9jda6/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..d32392dfaf2dadb14f0dca272c2c6a7f92d850eb --- /dev/null +++ b/drc11/nom9jda6/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1290d0e0597cf9ddaa9aedcaebb3738678bf3808ba5ed89a4ea10c39273508f +size 15803057 diff --git a/drc11/nom9jda6/cp_1602355200/cfg.json b/drc11/nom9jda6/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..834dadc82e23a2702d9210996f7fa408a8b5d255 --- /dev/null +++ b/drc11/nom9jda6/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 312960, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1602355200/model b/drc11/nom9jda6/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..91a063ceb2cdc9df5fa6637ea9407fd5046e064e --- /dev/null +++ b/drc11/nom9jda6/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d99aa2de57abc86fbfa5d0f6fc0472bb9798161324189e2b83011d6c43b8671 +size 15803057 diff --git a/drc11/nom9jda6/cp_1702502400/cfg.json b/drc11/nom9jda6/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8a33461ce549180e77a0c9f67972b869bb8aa63f --- /dev/null +++ b/drc11/nom9jda6/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 332520, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1702502400/model b/drc11/nom9jda6/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..4ad00afa12035a51e57d723c35ddadd337011c55 --- /dev/null +++ b/drc11/nom9jda6/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a72f13718f08c5c950ca5c2f0327a31c40fbbead629d0a8eafec7cfcc75c7c9 +size 15803057 diff --git a/drc11/nom9jda6/cp_1802649600/cfg.json b/drc11/nom9jda6/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..53056a0d51f1cce1773f4b59fcf34754fcb551c6 --- /dev/null +++ b/drc11/nom9jda6/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 352080, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1802649600/model b/drc11/nom9jda6/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..af34f4dc7e42ff361b75601ef00b399288c72fb6 --- /dev/null +++ b/drc11/nom9jda6/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb61178c2e0e8d703f9a9d1598d5d01f67af9e6a302ae7247072cf598eb5f8cf +size 15803057 diff --git a/drc11/nom9jda6/cp_1902796800/cfg.json b/drc11/nom9jda6/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..49910f2ac4d15cc6057700e4405af7d7f7e84400 --- /dev/null +++ b/drc11/nom9jda6/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 371640, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_1902796800/model b/drc11/nom9jda6/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..3f27361fc156e9a8d55ef584fde5a65b871d9362 --- /dev/null +++ b/drc11/nom9jda6/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:133f3bcfe0c767fbc7026ef203c5ce37466c98a628ec78131b71762898b3523f +size 15803057 diff --git a/drc11/nom9jda6/cp_2002944000/cfg.json b/drc11/nom9jda6/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1fb30edfd651e26236c5882905b1f8a0f1b993ac --- /dev/null +++ b/drc11/nom9jda6/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 780865547}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 453192545, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391200, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc11/nom9jda6/cp_2002944000/model b/drc11/nom9jda6/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..f0520a615f404da6f7d41c4d8c19d493e7c78097 --- /dev/null +++ b/drc11/nom9jda6/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f18e34fa3b0c8f8742143741597e61753278972ad2eafad800ee7643ccadc9 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0000998400/cfg.json b/drc11/v2fm2qze/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cc13f9b19c18c7ea535c5aecbc8f915b7533d4e2 --- /dev/null +++ b/drc11/v2fm2qze/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0000998400/model b/drc11/v2fm2qze/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..c0d941a8e84f277ac221327ede470a8c5f2194c8 --- /dev/null +++ b/drc11/v2fm2qze/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4220d255aa6833347effeb8a569cf8db002b43eeee966966ad28d6eefdff016c +size 15803057 diff --git a/drc11/v2fm2qze/cp_0002001920/cfg.json b/drc11/v2fm2qze/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9b296d1c538163627416004aa22e623d8fbd611b --- /dev/null +++ b/drc11/v2fm2qze/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0002001920/model b/drc11/v2fm2qze/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..8f2393bf4eb5adae85bc6452c22b2e5e52e4554d --- /dev/null +++ b/drc11/v2fm2qze/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6eccc43019ae2da3045ec0dd85b724ee7ceaf5a5622d98740458723466af6fb +size 15803057 diff --git a/drc11/v2fm2qze/cp_0003000320/cfg.json b/drc11/v2fm2qze/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cac215dde202df10bb368742dab44f0a952cdf89 --- /dev/null +++ b/drc11/v2fm2qze/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 586, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0003000320/model b/drc11/v2fm2qze/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..77db7bb354fdc42105dc7c295f4ca799ce01b4b3 --- /dev/null +++ b/drc11/v2fm2qze/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:130ff6fbaca7b97c91ec98f4e57a8f3b83e2a78a9d34775dc3fb6e167d89cc78 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0004003840/cfg.json b/drc11/v2fm2qze/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b36461f1a0a06d365cb4e0f3b78a4f10a13416fb --- /dev/null +++ b/drc11/v2fm2qze/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 782, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0004003840/model b/drc11/v2fm2qze/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..da4b22ddd5dc3df46feb08c379f44f2b79f158b8 --- /dev/null +++ b/drc11/v2fm2qze/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c758090bbed1ac3af4b35bd70a48acaa838d8c4ebcaaeb0be2ae661d3e2b8212 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0005007360/cfg.json b/drc11/v2fm2qze/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e24c421932ac21e677a6f286d70c4aa98dca8f31 --- /dev/null +++ b/drc11/v2fm2qze/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 978, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0005007360/model b/drc11/v2fm2qze/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..2c4d89cfe86892e34b32801e28c0e0ffd48d7aa6 --- /dev/null +++ b/drc11/v2fm2qze/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c4c7850ece9bc03b2e5bc72ae508db724589e0532827bd7b991634bdb39005 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0006005760/cfg.json b/drc11/v2fm2qze/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ff685d8f8a98d4a158042d3d8d4913daead4203e --- /dev/null +++ b/drc11/v2fm2qze/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1173, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0006005760/model b/drc11/v2fm2qze/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..252d4c36dbc49ad3656b04d6bf4aa19fd93cc4dd --- /dev/null +++ b/drc11/v2fm2qze/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8cf832d18063ffb9925923d21094aa681ed44ffb5cd3f33894d4d2219a8b89 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0007009280/cfg.json b/drc11/v2fm2qze/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2aa7c0fcac0a42476692ad8336fdd30f890339c0 --- /dev/null +++ b/drc11/v2fm2qze/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1369, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0007009280/model b/drc11/v2fm2qze/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..d085fdda91d755f843caf93b7bc56e6ee9a09e9c --- /dev/null +++ b/drc11/v2fm2qze/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54460df3632541c5171bebfb299a19cdba4021495623fe42096bbd90687c041e +size 15803057 diff --git a/drc11/v2fm2qze/cp_0008007680/cfg.json b/drc11/v2fm2qze/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..caf321da91cd4b0fbb3d4a5d85483464cbb7107b --- /dev/null +++ b/drc11/v2fm2qze/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1564, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0008007680/model b/drc11/v2fm2qze/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..748314cd0b8aea5bdd5e21212a826ad3f6ee9e31 --- /dev/null +++ b/drc11/v2fm2qze/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68378e05f7b36bdba47c7a8e6e60dd5265052599cc7ba289e4d5007d58f9bb7 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0009011200/cfg.json b/drc11/v2fm2qze/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c85ac6db658c618d10ce6955c37c3161688debcb --- /dev/null +++ b/drc11/v2fm2qze/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1760, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0009011200/model b/drc11/v2fm2qze/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..ea540d278dbc0deb8a1e26f001630a154b5a020e --- /dev/null +++ b/drc11/v2fm2qze/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b745b3f761260fdc18075c17cbcf7a789e4caf1dd8915a0a1730cda213148be5 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0010014720/cfg.json b/drc11/v2fm2qze/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a9f1328719b953da60cf85840af858900054c137 --- /dev/null +++ b/drc11/v2fm2qze/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 1956, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0010014720/model b/drc11/v2fm2qze/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..0fb31cf1efe46904af6f1f2fe46ce498a35dbfda --- /dev/null +++ b/drc11/v2fm2qze/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb25b1ef7b0a03013292a0091ae090c9faebb42316fb2867b76c41e5a1b96683 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0011013120/cfg.json b/drc11/v2fm2qze/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fd076ae8656f08fa2a9370ccf9248a60ed47f53a --- /dev/null +++ b/drc11/v2fm2qze/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2151, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0011013120/model b/drc11/v2fm2qze/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..d37231855516461a112abe8cf179ba0cffa7768c --- /dev/null +++ b/drc11/v2fm2qze/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3f7022a5a7511c2f0de1cd7ae5e9768f464bc670c8de0790fb000fde01afcc +size 15803057 diff --git a/drc11/v2fm2qze/cp_0012016640/cfg.json b/drc11/v2fm2qze/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3566e3be063d09edf7df09d6e3fdd550199e5d7e --- /dev/null +++ b/drc11/v2fm2qze/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2347, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0012016640/model b/drc11/v2fm2qze/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..634fa99d4f66812628d1e22370e189f73616834e --- /dev/null +++ b/drc11/v2fm2qze/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5b82984b3ad5075172c3ce3f671ff772a1546f6a93b1c811463786451e1fb4 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0013015040/cfg.json b/drc11/v2fm2qze/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5ee44f6b82d7ca55a575051880abb73a407dcfda --- /dev/null +++ b/drc11/v2fm2qze/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2542, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0013015040/model b/drc11/v2fm2qze/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..7a651771a3949f80fff8979a1b9bfd4c38811496 --- /dev/null +++ b/drc11/v2fm2qze/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24605508d09f1b4493d6767de6805c256347786ede1a3f6ecacca168b7e38f91 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0014018560/cfg.json b/drc11/v2fm2qze/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f8192cb26a5c8f2dfb253860c1b9aea496413240 --- /dev/null +++ b/drc11/v2fm2qze/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2738, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0014018560/model b/drc11/v2fm2qze/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..53b67319aec16adf939cd5fa5cffde0c45ad9050 --- /dev/null +++ b/drc11/v2fm2qze/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c64c578d809c7c4f800de9814fd42166001e0df79922b8b161aebbedb1f338d +size 15803057 diff --git a/drc11/v2fm2qze/cp_0015022080/cfg.json b/drc11/v2fm2qze/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d97761f1d06a08ae76051834d89c784656a4c31c --- /dev/null +++ b/drc11/v2fm2qze/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 2934, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0015022080/model b/drc11/v2fm2qze/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..08bd2bb0e01c3626bc85e6e529225a89653a5202 --- /dev/null +++ b/drc11/v2fm2qze/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325e132a6d958d84df1e5ef02a66b1bb058e6937137df6aa49088f39dd2bf921 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0016020480/cfg.json b/drc11/v2fm2qze/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b3bc0f5cdee59d2d4e49c3d9c6d6cfdfaddc9962 --- /dev/null +++ b/drc11/v2fm2qze/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3129, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0016020480/model b/drc11/v2fm2qze/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..6adb77e4be250bd883264271a2cccf1c2f46c6ba --- /dev/null +++ b/drc11/v2fm2qze/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feafd9486e1db6aee9a52e5ec965691a7d48cb3576867e46532f33fd759499db +size 15803057 diff --git a/drc11/v2fm2qze/cp_0017024000/cfg.json b/drc11/v2fm2qze/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e4a1f5a06e0211bf90349360576b10c24e854c11 --- /dev/null +++ b/drc11/v2fm2qze/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3325, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0017024000/model b/drc11/v2fm2qze/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..68eb4a8020df155e840421d1b0d3963e99b0e414 --- /dev/null +++ b/drc11/v2fm2qze/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66bed443e13b79beb1259338057b3beae9f81a838d42d71018c688ab92240343 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0018022400/cfg.json b/drc11/v2fm2qze/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f0fa143fa95f65ac54d6b9f2743f614137f5e3f8 --- /dev/null +++ b/drc11/v2fm2qze/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3520, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0018022400/model b/drc11/v2fm2qze/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..fc1ad646067c940cc04c20fca8d1eebf2894fbf7 --- /dev/null +++ b/drc11/v2fm2qze/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a8877535b30b72bc933a0cba5e33032bc78148103db5db20522adff04d696c4 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0019025920/cfg.json b/drc11/v2fm2qze/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1382dd03425448cbbbf7a7dd7c0bae630e7f7cb9 --- /dev/null +++ b/drc11/v2fm2qze/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3716, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0019025920/model b/drc11/v2fm2qze/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..8f842ecf6607745113c5cadb46b3c0759a336019 --- /dev/null +++ b/drc11/v2fm2qze/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a713cc51976205799dab603f665cb5faee848d5ba5ab3d9f5a0b7776382882f0 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0020029440/cfg.json b/drc11/v2fm2qze/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1b36f9bb502d6ce03f878c7d1f3f82183f3775bb --- /dev/null +++ b/drc11/v2fm2qze/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 3912, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0020029440/model b/drc11/v2fm2qze/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..e2c68a3c36dd4957d79c6304a9aed41ca9829830 --- /dev/null +++ b/drc11/v2fm2qze/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b68a815809fe5203b8bd100beb29c0c259175ff1755ede61109a06d41a77ac5 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0030044160/cfg.json b/drc11/v2fm2qze/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6f5cc9946b707bfca50565b5babbda62f69aaec1 --- /dev/null +++ b/drc11/v2fm2qze/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 5868, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0030044160/model b/drc11/v2fm2qze/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..98bfeb08f13799d72b1044819628b7985431a1f8 --- /dev/null +++ b/drc11/v2fm2qze/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4b9c0a1294345a4ffd4561c96aca2b33ee08348dda8e6c5d2c5dd4235c0e8a +size 15803057 diff --git a/drc11/v2fm2qze/cp_0040058880/cfg.json b/drc11/v2fm2qze/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e2611632b504144c7ebaa46e50382edff6d4451a --- /dev/null +++ b/drc11/v2fm2qze/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 7824, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0040058880/model b/drc11/v2fm2qze/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..efbb77056c7dc05603ade1d44b9d139ca3f96787 --- /dev/null +++ b/drc11/v2fm2qze/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8771181150d664fa19c33ed1b80f34042809d759895a107e839968e822b5905f +size 15803057 diff --git a/drc11/v2fm2qze/cp_0050073600/cfg.json b/drc11/v2fm2qze/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ff5576352570a3d758b9ce2187f9f750002b7e30 --- /dev/null +++ b/drc11/v2fm2qze/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 9780, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0050073600/model b/drc11/v2fm2qze/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..5e72024e6baf7e786f48ce5da63a5430cdfd65ae --- /dev/null +++ b/drc11/v2fm2qze/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a75c414e627366f02fd7ddfa49dc1632492153b13a96f26e68543594b3192121 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0060088320/cfg.json b/drc11/v2fm2qze/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ada9597505c0169a51e3d2ae8e65864be397827e --- /dev/null +++ b/drc11/v2fm2qze/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 11736, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0060088320/model b/drc11/v2fm2qze/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..3d57ba505ea2ee4732bd139840ae4c63f031eb0c --- /dev/null +++ b/drc11/v2fm2qze/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39b4baf1ecb976a55f6644012ba1f3c1d5df88102ad981563d802b7fc293003 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0070103040/cfg.json b/drc11/v2fm2qze/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d8089d333daf00d143c28bc9c3245f50944d85e8 --- /dev/null +++ b/drc11/v2fm2qze/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 13692, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0070103040/model b/drc11/v2fm2qze/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..78c34935d78ba23fb5c761818e8125eabebfb819 --- /dev/null +++ b/drc11/v2fm2qze/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db570c6ce21dd190c4e4dc6adddb2ce5ebe6a60f57b0d4eaf597a291d96d12fe +size 15803057 diff --git a/drc11/v2fm2qze/cp_0080117760/cfg.json b/drc11/v2fm2qze/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..44acb160fb16949fe824f586f193685105b819c0 --- /dev/null +++ b/drc11/v2fm2qze/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 15648, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0080117760/model b/drc11/v2fm2qze/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..bc8be66ab0a5b167f6394482c5b8561b83686487 --- /dev/null +++ b/drc11/v2fm2qze/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2724c454023b1352bdcfe82da94324775260230f3e2126feba1f1fd8639480d8 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0090132480/cfg.json b/drc11/v2fm2qze/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5a2f0a0c596bb9402adf46e69123e4fe375b0205 --- /dev/null +++ b/drc11/v2fm2qze/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 17604, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0090132480/model b/drc11/v2fm2qze/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..63af51aefbef9aa1a3b8eedaafdf8d8b080f3958 --- /dev/null +++ b/drc11/v2fm2qze/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd188a8d25a8118c4ccd89695a8a4d785f28b01eb39ad51e439726867faa4f5a +size 15803057 diff --git a/drc11/v2fm2qze/cp_0100147200/cfg.json b/drc11/v2fm2qze/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7dff3b94c390d45ac9c0a1b01ae3dea0465cccb9 --- /dev/null +++ b/drc11/v2fm2qze/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 19560, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0100147200/model b/drc11/v2fm2qze/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..5df33c8c246fdc19514281bbcfa8fe600b778308 --- /dev/null +++ b/drc11/v2fm2qze/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b46462cd0b5ef28092e12f89d93ee5a87893694d966972d0c33e69031853a016 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0110161920/cfg.json b/drc11/v2fm2qze/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ef3cc0166118530221cf034d2a6f59e7449a3640 --- /dev/null +++ b/drc11/v2fm2qze/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 21516, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0110161920/model b/drc11/v2fm2qze/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..8b53a75ac6f56742257ff262347aa36758969440 --- /dev/null +++ b/drc11/v2fm2qze/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5d24765230db86a70d4978c2767bc5a6434e3ecae501a350bd274efd8c568e9 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0120176640/cfg.json b/drc11/v2fm2qze/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b75c77a4ad6e83aea730e4c16f74a2567f086aed --- /dev/null +++ b/drc11/v2fm2qze/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 23472, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0120176640/model b/drc11/v2fm2qze/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..4fdf48396d78f67e7a79e84305e687db0f8ba267 --- /dev/null +++ b/drc11/v2fm2qze/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b199924f1c42e39759d8553a522a0a483361bd3fd197c2024254e4133b91e99 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0130191360/cfg.json b/drc11/v2fm2qze/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f509ef346de273e6c73e12e47754d28f57707118 --- /dev/null +++ b/drc11/v2fm2qze/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 25428, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0130191360/model b/drc11/v2fm2qze/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..405bcb477c0e1fb91e15734dcef1f43e8e9905c7 --- /dev/null +++ b/drc11/v2fm2qze/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3fcacae26675888cf9ce784204d2d9648465aca7a4e9573e49d2bbf223e4e6 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0140206080/cfg.json b/drc11/v2fm2qze/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..de0b2f6ea6b2372c463c39d14a945b929351f7ff --- /dev/null +++ b/drc11/v2fm2qze/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 27384, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0140206080/model b/drc11/v2fm2qze/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..e911801f39f3d92be22ed682e4f55db932923125 --- /dev/null +++ b/drc11/v2fm2qze/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:600575af3f9e2abd58dfa03eec4032fb4d9ea4c6842fd61c779863d1e1b1110c +size 15803057 diff --git a/drc11/v2fm2qze/cp_0150220800/cfg.json b/drc11/v2fm2qze/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cf2d942fe24af5adbb98c1666a8242c085343525 --- /dev/null +++ b/drc11/v2fm2qze/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 29340, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0150220800/model b/drc11/v2fm2qze/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..0fa35d56a3bd03eb4470fd35dce3b68de2da1c4c --- /dev/null +++ b/drc11/v2fm2qze/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe289315a0962676d1b4a0ea75846d8f261199044f94742929aaace2f724138e +size 15803057 diff --git a/drc11/v2fm2qze/cp_0160235520/cfg.json b/drc11/v2fm2qze/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..20d2d14e0477cfff63eaca9ae5d6dbe40481d411 --- /dev/null +++ b/drc11/v2fm2qze/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 31296, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0160235520/model b/drc11/v2fm2qze/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..eec1d733a96d4ecfa9f3736e324c51394bc181f1 --- /dev/null +++ b/drc11/v2fm2qze/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99834074ce138af4d4fe24d2919b3d8af3127ac69013dbab028f3f16b663711d +size 15803057 diff --git a/drc11/v2fm2qze/cp_0170250240/cfg.json b/drc11/v2fm2qze/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c90f4e799da09f8a5a357b188634e38fd8ff2514 --- /dev/null +++ b/drc11/v2fm2qze/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 33252, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0170250240/model b/drc11/v2fm2qze/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..a4969f7b7dc41e50dd6f5f87e8687e4de9b92c7b --- /dev/null +++ b/drc11/v2fm2qze/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e53d0e3a565ff4a6af0addd7b79f8391df1226640d86840ed710e88d1cef0e22 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0180264960/cfg.json b/drc11/v2fm2qze/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..edd96b7cade04303e8e12f72a0ea3e67c58da6d2 --- /dev/null +++ b/drc11/v2fm2qze/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 35208, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0180264960/model b/drc11/v2fm2qze/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..200ac94df966a4bc035f181d4e6f235d13b76648 --- /dev/null +++ b/drc11/v2fm2qze/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db84074c901dd67d268921ff2c106f547548b15851e6b6a41d5fe764b49dcbb9 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0190279680/cfg.json b/drc11/v2fm2qze/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5dbedaf26b211157a2c2a368866781d41645b2ae --- /dev/null +++ b/drc11/v2fm2qze/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 37164, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0190279680/model b/drc11/v2fm2qze/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..2d3ffd643b7c1c7d0d597bb77f414650ae172598 --- /dev/null +++ b/drc11/v2fm2qze/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f977636cb1724e7ab8dde8f58de33ae390efb61cbba54a796a2541d5a7d12e05 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0200294400/cfg.json b/drc11/v2fm2qze/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5c25ca8202a1584e76551f1b4aabc39e7ba08f46 --- /dev/null +++ b/drc11/v2fm2qze/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 39120, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0200294400/model b/drc11/v2fm2qze/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..df9f3a20215c8763fb0d80c5c862c3d1a0287a42 --- /dev/null +++ b/drc11/v2fm2qze/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f37a70ac7509b4a653cb95d1cd2c55deed5da9967ddeaa9fe4588c2f3ffa23d +size 15803057 diff --git a/drc11/v2fm2qze/cp_0300441600/cfg.json b/drc11/v2fm2qze/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..381906bf4cf2a359452fdbdb6ea0470784dd30ac --- /dev/null +++ b/drc11/v2fm2qze/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 58680, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0300441600/model b/drc11/v2fm2qze/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..2d63f89fc5eeac429f373c1972a90262457e8c37 --- /dev/null +++ b/drc11/v2fm2qze/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7568d166d4957de26b55ec72aa7e1b73a53a754026793e44eba4f7558be4a7 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0400588800/cfg.json b/drc11/v2fm2qze/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..01557848020fa00e0a84282b2ea387f9e86c1647 --- /dev/null +++ b/drc11/v2fm2qze/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 78240, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0400588800/model b/drc11/v2fm2qze/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..04df1b9658d5e8ce9e40a94753406707cc73106e --- /dev/null +++ b/drc11/v2fm2qze/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b74625df3e542fec8faa76e6441c24b0227684c64d9c2fadba6414add1c3778 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0500736000/cfg.json b/drc11/v2fm2qze/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..07793a440c57fedb80925795a32dc312a0e0f86b --- /dev/null +++ b/drc11/v2fm2qze/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 97800, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0500736000/model b/drc11/v2fm2qze/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..c91cfdb6910be4655ae034fdbe288b3aa6762742 --- /dev/null +++ b/drc11/v2fm2qze/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3d8b58103664806125d6c360b7374799f3d66a1b93b748a8269d99d0e7a8c6 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0600883200/cfg.json b/drc11/v2fm2qze/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8f5ba0d3e1304275f04b34dba45c213a3a823cad --- /dev/null +++ b/drc11/v2fm2qze/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 117360, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0600883200/model b/drc11/v2fm2qze/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..f8c23fd69a77588c1ceb48b188d762e8e0afb1ed --- /dev/null +++ b/drc11/v2fm2qze/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66aae3c92f6df8a9c6fdc503b1dea74578226218a59e6a2187c0280063a8d1b9 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0701030400/cfg.json b/drc11/v2fm2qze/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c35da226e9fc33cc98f2bb939393a11acd787d07 --- /dev/null +++ b/drc11/v2fm2qze/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 136920, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0701030400/model b/drc11/v2fm2qze/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..e9d78e402ec1bff2803d2aad255abdfade46fee8 --- /dev/null +++ b/drc11/v2fm2qze/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323f92b9ab13c52a9c4bfeb41e3fa9c8754fb2be3a4999ac50095198bb1bd8eb +size 15803057 diff --git a/drc11/v2fm2qze/cp_0801177600/cfg.json b/drc11/v2fm2qze/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..acb1b2b8acbe33b1550788efb616fa46413fab12 --- /dev/null +++ b/drc11/v2fm2qze/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 156480, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0801177600/model b/drc11/v2fm2qze/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..fe87c9c291efbf7a47b5868ac4532f7e292f8b1b --- /dev/null +++ b/drc11/v2fm2qze/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7eddd638899dba15479c560495dae9d870988feed4fc805ba4b9f88b49fd979 +size 15803057 diff --git a/drc11/v2fm2qze/cp_0901324800/cfg.json b/drc11/v2fm2qze/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e10fae3d83219abbea3aaa3e50eb3fa463fb26ac --- /dev/null +++ b/drc11/v2fm2qze/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 176040, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_0901324800/model b/drc11/v2fm2qze/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..4d451263618c32a064e5ede55c5b4a92e2a435dc --- /dev/null +++ b/drc11/v2fm2qze/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042658e74991b6c92cf6f9f2c6f446ba4ccb52550d417f5fbd8d7edaab4bb413 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1001472000/cfg.json b/drc11/v2fm2qze/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c8b30504b39415dfc9fcaf302ee6f0ed7aad1308 --- /dev/null +++ b/drc11/v2fm2qze/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 195600, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1001472000/model b/drc11/v2fm2qze/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..87dfe6291f73cbfb5c5e44e38ae04c42e29d4f7d --- /dev/null +++ b/drc11/v2fm2qze/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:661624b63e6ae4f101524669d41a9de53acab3ace06534e5637a4e5e0c959603 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1101619200/cfg.json b/drc11/v2fm2qze/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6d62df1598424209aa0b8a6438b4b144d4bcbc1b --- /dev/null +++ b/drc11/v2fm2qze/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 215160, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1101619200/model b/drc11/v2fm2qze/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..08ec8ca46d5627ae806335a9646cdd00e01827c3 --- /dev/null +++ b/drc11/v2fm2qze/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539fd4a976a94a700318a9b70f0a38949117c394292459883e28d1c503829aa0 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1201766400/cfg.json b/drc11/v2fm2qze/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4ab5dd5624ac7f8cf2158e5742a656d012f4d117 --- /dev/null +++ b/drc11/v2fm2qze/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 234720, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1201766400/model b/drc11/v2fm2qze/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..8c59c6685abb032a7b57bf654782106975fc3f62 --- /dev/null +++ b/drc11/v2fm2qze/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d69e82117e0f6033f812891102c023214bbe698e3111e6b0216316f6825aa03 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1301913600/cfg.json b/drc11/v2fm2qze/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bcced7d994c42abb775825a3bfd5118281d29cd6 --- /dev/null +++ b/drc11/v2fm2qze/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 254280, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1301913600/model b/drc11/v2fm2qze/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..0d031b42bb6944cbc2c5e0c9c8f76f34763b36a2 --- /dev/null +++ b/drc11/v2fm2qze/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f5bac8aaa9f2bdd062b4d5b3bb8ae37cae06123b1b4ed98965b44eb7fa75aaa +size 15803057 diff --git a/drc11/v2fm2qze/cp_1402060800/cfg.json b/drc11/v2fm2qze/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ab905e3c5edcb6f2dadb6ea525fd48e5eaee2380 --- /dev/null +++ b/drc11/v2fm2qze/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 273840, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1402060800/model b/drc11/v2fm2qze/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..e76a117c5e61a50b97b4b4f1fd317fcbd1b7445b --- /dev/null +++ b/drc11/v2fm2qze/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8dc32e8fbff205dbba5e9ee463218e3b91d065a38cb05fd967ee1f247d92af2 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1502208000/cfg.json b/drc11/v2fm2qze/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ed158a279cf7db7a71f2b982a4f6799e9cd96c36 --- /dev/null +++ b/drc11/v2fm2qze/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 293400, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1502208000/model b/drc11/v2fm2qze/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..d45a6d75ffb63e1e8b4c69343fae76275305c945 --- /dev/null +++ b/drc11/v2fm2qze/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7475afc28c158b9d66b68f2abe471259c7df0cf5f956f4e4c4f362821a61d9 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1602355200/cfg.json b/drc11/v2fm2qze/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..161d6db8550503148220a0da1aa8aae06a1d70d8 --- /dev/null +++ b/drc11/v2fm2qze/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 312960, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1602355200/model b/drc11/v2fm2qze/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..b0316245249fcae32317539c3c1b86fc9b9d49b5 --- /dev/null +++ b/drc11/v2fm2qze/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cca57c4a83e6cbb9d3b9ffb68314abda5f4439232cca32b85364308120601c4 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1702502400/cfg.json b/drc11/v2fm2qze/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..68352d9791bfb3da6dda288585bcdfe8c05a819e --- /dev/null +++ b/drc11/v2fm2qze/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 332520, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1702502400/model b/drc11/v2fm2qze/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..e11a6130ba51ad11ad2c3707c262191fa832edb6 --- /dev/null +++ b/drc11/v2fm2qze/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c602f3f52e714e1b86b28e6697f2221b81d754d0b283e270e0ccc48f13b0926 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1802649600/cfg.json b/drc11/v2fm2qze/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a971adc5ee4c75acc3e338516a727451d4b3289d --- /dev/null +++ b/drc11/v2fm2qze/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 352080, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1802649600/model b/drc11/v2fm2qze/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..5cda0bd0a12f95c71fb3f598db9b1f5d0f48924b --- /dev/null +++ b/drc11/v2fm2qze/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:749e314044d9947c0d20861cc5c8ed9be01b528f985f92ba5bc3b82adf00f026 +size 15803057 diff --git a/drc11/v2fm2qze/cp_1902796800/cfg.json b/drc11/v2fm2qze/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a459c125cceef77baca67c57bc5853d7d8a84327 --- /dev/null +++ b/drc11/v2fm2qze/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 371640, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_1902796800/model b/drc11/v2fm2qze/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..ef37990f1ad9a04494c78e26eeff4f2e4c4e25e8 --- /dev/null +++ b/drc11/v2fm2qze/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d5c3aa166c65193e85360be4c65fe37e347906a4a927e8af8eb3e059a12edbf +size 15803057 diff --git a/drc11/v2fm2qze/cp_2002944000/cfg.json b/drc11/v2fm2qze/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a04a56696c901d7d764b3a481a56d555f347da5e --- /dev/null +++ b/drc11/v2fm2qze/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 275245589}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 990752601, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 1, "repeats_per_step": 1, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "learner_policy_version": 391200, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc11/v2fm2qze/cp_2002944000/model b/drc11/v2fm2qze/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..64c9be4c522ac4a7007b0af063c46c84388b8b4b --- /dev/null +++ b/drc11/v2fm2qze/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b96cecaff01d0599de39a130f406d26d2cd0da5b4681b54f6f93d9703415c1 +size 15803057 diff --git a/drc33/bkynosqi/cp_0000998400/cfg.json b/drc33/bkynosqi/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c11656cb795d716e06122d6dc941b190d646fbc9 --- /dev/null +++ b/drc33/bkynosqi/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0000998400/model b/drc33/bkynosqi/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..f7f1e32877c6e8383644a372844aced70f4c631d --- /dev/null +++ b/drc33/bkynosqi/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf36d07060dfc3ab870b4e459739a5fd745167f20dca5f46d6e6b22f968eee5 +size 20566089 diff --git a/drc33/bkynosqi/cp_0002001920/cfg.json b/drc33/bkynosqi/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..98aa665e5b832ccf27282b14f1417a34e3a2cb3f --- /dev/null +++ b/drc33/bkynosqi/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0002001920/model b/drc33/bkynosqi/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..6a6a6830869d06805d1ef79e95e145461e7b7c3b --- /dev/null +++ b/drc33/bkynosqi/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e819bb7b9be8b96ee7d8810a4a908806495c273db7551d10458aee1adeeadd32 +size 20566089 diff --git a/drc33/bkynosqi/cp_0003000320/cfg.json b/drc33/bkynosqi/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b5ecd1776cde2f3fb1f3ef6c427f84331b8cbd1d --- /dev/null +++ b/drc33/bkynosqi/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0003000320/model b/drc33/bkynosqi/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..3d2f8a3f7c2765688f72f5480c3d59e9d674d9c6 --- /dev/null +++ b/drc33/bkynosqi/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3b784c4ec7ba57bf689cdff45a40c5cdeb553a62edaedd67c5d2e769cc9eaa +size 20566089 diff --git a/drc33/bkynosqi/cp_0004003840/cfg.json b/drc33/bkynosqi/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0b9e383459a79587c79fa64bedeba729d3d212aa --- /dev/null +++ b/drc33/bkynosqi/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0004003840/model b/drc33/bkynosqi/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..11607021cb90da3ba600ab5922493118fc7c41a7 --- /dev/null +++ b/drc33/bkynosqi/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87fa07f3b10e955c340a45d927ab2b9b871cae961dfcd21929cf1e96182fb3cc +size 20566089 diff --git a/drc33/bkynosqi/cp_0005007360/cfg.json b/drc33/bkynosqi/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..505d65fc5c7703cbe86bfa3e5cbea103c79b8f5a --- /dev/null +++ b/drc33/bkynosqi/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0005007360/model b/drc33/bkynosqi/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..f4114d19563b6641c1d712089f31eaef6631f501 --- /dev/null +++ b/drc33/bkynosqi/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c42d4c7ff4e5dee5da5cf2513b46197a9a18455f9036b90492fe2f7970d3eba +size 20566089 diff --git a/drc33/bkynosqi/cp_0006005760/cfg.json b/drc33/bkynosqi/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..09c1ce164f214a073b1ebc70f43dec6d9e8ce2c2 --- /dev/null +++ b/drc33/bkynosqi/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0006005760/model b/drc33/bkynosqi/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..e50a3eeebf887b001e95f8eb110ce0392c333802 --- /dev/null +++ b/drc33/bkynosqi/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0691db8623223c629c0321dfb5b519874c2829fefbfe09beacf5e48cfaaeb3a +size 20566089 diff --git a/drc33/bkynosqi/cp_0007009280/cfg.json b/drc33/bkynosqi/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..09f2dd4e6e02334689849ba9aab2c3694ca7f56e --- /dev/null +++ b/drc33/bkynosqi/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0007009280/model b/drc33/bkynosqi/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..f171427e8acc060a0e39d05abc0fa27803c97be9 --- /dev/null +++ b/drc33/bkynosqi/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aced405e3c7ffcbed14923431cdfb3effd71348dc0e88fd0232935db81046d6 +size 20566089 diff --git a/drc33/bkynosqi/cp_0008007680/cfg.json b/drc33/bkynosqi/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3ef12ace0ecb6d1c9aebeec1f8db3566158f5ce6 --- /dev/null +++ b/drc33/bkynosqi/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0008007680/model b/drc33/bkynosqi/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..a29bf3912c2ffbd32dfdc2dbbdff687e503afa20 --- /dev/null +++ b/drc33/bkynosqi/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80c08a30d1e124121e777b7939a495f95c5f2cc053e62cef39371d6b508d73c7 +size 20566089 diff --git a/drc33/bkynosqi/cp_0009011200/cfg.json b/drc33/bkynosqi/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1f2678ac915597256b6ab9a920d086c893f97316 --- /dev/null +++ b/drc33/bkynosqi/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0009011200/model b/drc33/bkynosqi/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..4d1093225f231b64f3b59263b0c4286aac102bf4 --- /dev/null +++ b/drc33/bkynosqi/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163a1c9b50da82d2d6a4f5c1fb5f0ed05f816eb81273be4f1c3be8f6ca03f89a +size 20566089 diff --git a/drc33/bkynosqi/cp_0010014720/cfg.json b/drc33/bkynosqi/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..32bb53c8f3c96446b6340a4660b540a9502a764e --- /dev/null +++ b/drc33/bkynosqi/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0010014720/model b/drc33/bkynosqi/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..4fb3a8b7b205c412930b5faabffdcca1eda09e71 --- /dev/null +++ b/drc33/bkynosqi/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ea06e78c4867dfec91ee139e1ba4a957b30d7ee8da34215313385a477a8b76 +size 20566089 diff --git a/drc33/bkynosqi/cp_0011013120/cfg.json b/drc33/bkynosqi/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6b6ff9d89ec7f1822cd63fc2e48e4d4caef50f49 --- /dev/null +++ b/drc33/bkynosqi/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0011013120/model b/drc33/bkynosqi/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..97a580a300ca3fc2e057691efe5d85acc894afb5 --- /dev/null +++ b/drc33/bkynosqi/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9671845ef485d4173e2718f3a571d7502d345e72186079fe5c3a8c31847dac27 +size 20566089 diff --git a/drc33/bkynosqi/cp_0012016640/cfg.json b/drc33/bkynosqi/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e049910f0c74faa3ba92c04551085c4c74a099d5 --- /dev/null +++ b/drc33/bkynosqi/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0012016640/model b/drc33/bkynosqi/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..0e501531fb51716b875b69d6b43d91eb0087defe --- /dev/null +++ b/drc33/bkynosqi/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ee2e590ef9b3aa0f4e41b84cc133faa88614c7aa25b351efcf99e2d5d8f4bd +size 20566089 diff --git a/drc33/bkynosqi/cp_0013015040/cfg.json b/drc33/bkynosqi/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1002964d05398e6082a8c528703eef4face931d5 --- /dev/null +++ b/drc33/bkynosqi/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0013015040/model b/drc33/bkynosqi/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..3e256ca58080668d812b7ea24d8a86ae9ba5b94e --- /dev/null +++ b/drc33/bkynosqi/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8ff53bb3d476add4c64ce1b9bfb23760f0809ddc7109d2f64eb4181a5b5a19 +size 20566089 diff --git a/drc33/bkynosqi/cp_0014018560/cfg.json b/drc33/bkynosqi/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..74c67e84dbfeca0d4c900add83fb3bea4b37b15f --- /dev/null +++ b/drc33/bkynosqi/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0014018560/model b/drc33/bkynosqi/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..6416923f533619b8d5c43d0e57148953e1ebe592 --- /dev/null +++ b/drc33/bkynosqi/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d5172ee71431513ec9bedaa1263bf2cd90a97f1b05fe2d571f17316073c869 +size 20566089 diff --git a/drc33/bkynosqi/cp_0015022080/cfg.json b/drc33/bkynosqi/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9dc1d5524c0178138eb1a65bade7f5e210188754 --- /dev/null +++ b/drc33/bkynosqi/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0015022080/model b/drc33/bkynosqi/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..60a264e9a670e246caa73f4ab68832233128bdb2 --- /dev/null +++ b/drc33/bkynosqi/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8bc3226e518c13303d9609bee79cce0403de0f263710d3ca5af0cc887cefa73 +size 20566089 diff --git a/drc33/bkynosqi/cp_0016020480/cfg.json b/drc33/bkynosqi/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cc57418958fb17c4fe423f8904fc13e6e265166c --- /dev/null +++ b/drc33/bkynosqi/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0016020480/model b/drc33/bkynosqi/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..16a355d65efabce28e3cf38bbf83a61af41dfb6f --- /dev/null +++ b/drc33/bkynosqi/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26bd0c9e494c13d3350d3339dd429e3791730ce980e719d8fd6cb010db9ddbb2 +size 20566089 diff --git a/drc33/bkynosqi/cp_0017024000/cfg.json b/drc33/bkynosqi/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5ea25f585169279bdd1d1a4d1ab33f47953a6596 --- /dev/null +++ b/drc33/bkynosqi/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0017024000/model b/drc33/bkynosqi/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..df5f11c5a745d0c917b59ab5754f2be7f11cbad2 --- /dev/null +++ b/drc33/bkynosqi/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba5d766187fdc1c569245792b0e139350611a2d7e02be2843c93a926acdd69c +size 20566089 diff --git a/drc33/bkynosqi/cp_0018022400/cfg.json b/drc33/bkynosqi/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3724d1b1c6a9488f76c91209679c19016285494e --- /dev/null +++ b/drc33/bkynosqi/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0018022400/model b/drc33/bkynosqi/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..91054fa615764758929bcd9ac4cc83728873e623 --- /dev/null +++ b/drc33/bkynosqi/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8093441df999926680652bfc2502c26869a4f776a2a0a0956e71260f81592a16 +size 20566089 diff --git a/drc33/bkynosqi/cp_0019025920/cfg.json b/drc33/bkynosqi/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e35f80c55abade44b369d178783cd90dd900b4b1 --- /dev/null +++ b/drc33/bkynosqi/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0019025920/model b/drc33/bkynosqi/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..0ceee0c5c55c94271f24d488e26ed33eccb645d7 --- /dev/null +++ b/drc33/bkynosqi/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80b347399a1c76220a88ac2cb0c1b6a9a39c2cd2a64422f7b62515e4b18a7b6 +size 20566089 diff --git a/drc33/bkynosqi/cp_0020029440/cfg.json b/drc33/bkynosqi/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c0018751bc32d21d3788f86bbb20ccdb4c9009f4 --- /dev/null +++ b/drc33/bkynosqi/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0020029440/model b/drc33/bkynosqi/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..9f1f7cf9ca9b653d0b6bc1aa4031f67993681525 --- /dev/null +++ b/drc33/bkynosqi/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c35e031586eaf5c27ac9130f8a8f75c15e34ba0a61fabf4e17a79012be53a8 +size 20566089 diff --git a/drc33/bkynosqi/cp_0030044160/cfg.json b/drc33/bkynosqi/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a2949eb99603ceb7218e5d8b4a6f771d756752df --- /dev/null +++ b/drc33/bkynosqi/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0030044160/model b/drc33/bkynosqi/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..fc70bc455b243be5c730f5fa082221a8a396ad79 --- /dev/null +++ b/drc33/bkynosqi/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b14d929186a68d514ef7b5d07f0a49f96ed9d3eea146b590c2225a425e90d66d +size 20566089 diff --git a/drc33/bkynosqi/cp_0040058880/cfg.json b/drc33/bkynosqi/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f69c284768dfb6b135c2d5832f2a75c7170b819c --- /dev/null +++ b/drc33/bkynosqi/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0040058880/model b/drc33/bkynosqi/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..e4800b91dff6cc4e9bff0f1f01fca408359544a5 --- /dev/null +++ b/drc33/bkynosqi/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0624a5bd6c8d0a5aac4b21feb1e776a920779b8980c5d39dde9bd135ee730e28 +size 20566089 diff --git a/drc33/bkynosqi/cp_0050073600/cfg.json b/drc33/bkynosqi/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d3d0880dbbf9e764a126801746cb55b13d8074e2 --- /dev/null +++ b/drc33/bkynosqi/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0050073600/model b/drc33/bkynosqi/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..4d09249112d5fa260b4ef98d2a22e5009510b97f --- /dev/null +++ b/drc33/bkynosqi/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399d60b4694d0ca7f12bfe00c5185c05d6977bddad0ce435a6603d2dd16f2124 +size 20566089 diff --git a/drc33/bkynosqi/cp_0060088320/cfg.json b/drc33/bkynosqi/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe40a2911caf7335f7b1faafb47d18d80b64495 --- /dev/null +++ b/drc33/bkynosqi/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0060088320/model b/drc33/bkynosqi/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..5cf8bda793f6afda835cd6e8b2bbcaa3207111f1 --- /dev/null +++ b/drc33/bkynosqi/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15c7a2b1936605412849ff6a1248a15cdae11bb7b022608681b4a17e2890ce1 +size 20566089 diff --git a/drc33/bkynosqi/cp_0070103040/cfg.json b/drc33/bkynosqi/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2dbb9c3c0d2a533f4d61398ede6b4cb7c59350df --- /dev/null +++ b/drc33/bkynosqi/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0070103040/model b/drc33/bkynosqi/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..98448a7ec50ac5be4bf3343ae64a19236e88bf35 --- /dev/null +++ b/drc33/bkynosqi/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:650424397f39f1692d677735f64b77fa592b7684ecdbf7e84a9a3b4ab15aa63a +size 20566089 diff --git a/drc33/bkynosqi/cp_0080117760/cfg.json b/drc33/bkynosqi/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1e3c462bbde983830a573c0fad851b9ea7b76060 --- /dev/null +++ b/drc33/bkynosqi/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0080117760/model b/drc33/bkynosqi/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..f9b65ea9511f35503760d93cdfcce9b1f7f9e4cf --- /dev/null +++ b/drc33/bkynosqi/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c8d548264ca73da886356d6c93db01687ab054e281764d010eadc241362a8f0 +size 20566089 diff --git a/drc33/bkynosqi/cp_0090132480/cfg.json b/drc33/bkynosqi/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..565d854a34a030e2ec1161735a64c7eb801e9c48 --- /dev/null +++ b/drc33/bkynosqi/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0090132480/model b/drc33/bkynosqi/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..4b7c886340234d72f9efcd2f5852fb53ae5da638 --- /dev/null +++ b/drc33/bkynosqi/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce8fac4e8af2f1c88520aab750729ed5171d3408f9e3effc9239833e544b114 +size 20566089 diff --git a/drc33/bkynosqi/cp_0100147200/cfg.json b/drc33/bkynosqi/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..64396c50f60c0475b2979f3120aba99cc8577fb4 --- /dev/null +++ b/drc33/bkynosqi/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0100147200/model b/drc33/bkynosqi/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..33d71e82463b0db3fa576dd3a62cb17ad09071be --- /dev/null +++ b/drc33/bkynosqi/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96052d089a931003f22bcfae50585455654b4129e29b5fbc580f7b460ade768 +size 20566089 diff --git a/drc33/bkynosqi/cp_0110161920/cfg.json b/drc33/bkynosqi/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae52d12a6f797af9a6128fcb9650667722044e24 --- /dev/null +++ b/drc33/bkynosqi/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0110161920/model b/drc33/bkynosqi/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..b8bbde18b5243eaed8da79e358efbc9570b3192f --- /dev/null +++ b/drc33/bkynosqi/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c5230f1bce4a455ae3051f0f2c5b011c66a5960989ffb0a0de38eb564f5998 +size 20566089 diff --git a/drc33/bkynosqi/cp_0120176640/cfg.json b/drc33/bkynosqi/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1dfcd9a2e5145a7ac25f0c9c66d51aeb29a26270 --- /dev/null +++ b/drc33/bkynosqi/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0120176640/model b/drc33/bkynosqi/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..05d25891fa1c2abbab14c23df49a42cd51a04e7c --- /dev/null +++ b/drc33/bkynosqi/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0af3dbb164d0be4b21aa0e48a5819f7c5b549389e6e0059ad3c9f5d827cae09 +size 20566089 diff --git a/drc33/bkynosqi/cp_0130191360/cfg.json b/drc33/bkynosqi/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8f482a0d2fbc31c0871321b1148f94563a965a28 --- /dev/null +++ b/drc33/bkynosqi/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0130191360/model b/drc33/bkynosqi/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..020a2885026a0cf7e8ee4af8800f9b8b46e7ea8a --- /dev/null +++ b/drc33/bkynosqi/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15859cd9203d630e7fd5b8f7ec889a060c2b8f3bf8dc79a466d51f0699c92926 +size 20566089 diff --git a/drc33/bkynosqi/cp_0140206080/cfg.json b/drc33/bkynosqi/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77072dedb18406e29fbda6076cc2e716fdb163f3 --- /dev/null +++ b/drc33/bkynosqi/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0140206080/model b/drc33/bkynosqi/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..32cb7ecfefe619fd088a2b75b89aed063eefdaea --- /dev/null +++ b/drc33/bkynosqi/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6dbe867c6acd65b65440cb57ca66cf5ff881e1e76c31b246eec79f35e18eda +size 20566089 diff --git a/drc33/bkynosqi/cp_0150220800/cfg.json b/drc33/bkynosqi/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..15b4c3e9740d7755857f01bf2ffc563b8484c7d6 --- /dev/null +++ b/drc33/bkynosqi/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0150220800/model b/drc33/bkynosqi/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..50a0d2fda16a5469c070bbdf14087c4980832a73 --- /dev/null +++ b/drc33/bkynosqi/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bca40627d02891de923707005488af0cd853ea75fd02b64ec290a388e55b039 +size 20566089 diff --git a/drc33/bkynosqi/cp_0160235520/cfg.json b/drc33/bkynosqi/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..67977336fc48a909315ba67a3ef1e6a375653f9f --- /dev/null +++ b/drc33/bkynosqi/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0160235520/model b/drc33/bkynosqi/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..ae0f9531a03201d909df7284a4d78c07b05822ee --- /dev/null +++ b/drc33/bkynosqi/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d535eaab18c4c778cd2b6e313ee17d51641da1cc36e57edd867cee076b1e27f1 +size 20566089 diff --git a/drc33/bkynosqi/cp_0170250240/cfg.json b/drc33/bkynosqi/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..def90591e73a3941d88bac81ed08021e67997126 --- /dev/null +++ b/drc33/bkynosqi/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0170250240/model b/drc33/bkynosqi/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..2a7bcbfc40e832069c545b82e5fd415262438582 --- /dev/null +++ b/drc33/bkynosqi/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d423d2df81c61eb76914aca6a59464efa004e67b0547de753cdb2d904aae96b +size 20566089 diff --git a/drc33/bkynosqi/cp_0180264960/cfg.json b/drc33/bkynosqi/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f1e863751fe043eff1e6b40a4e50ef9a1a9bc9ca --- /dev/null +++ b/drc33/bkynosqi/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0180264960/model b/drc33/bkynosqi/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..03095a128b5f86436ce9016b0a93d0b2c2524c7c --- /dev/null +++ b/drc33/bkynosqi/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137d5b07fb14d69d9756b072a3a1adfa2c182f5f6538d579172c7c88e6511bdf +size 20566089 diff --git a/drc33/bkynosqi/cp_0190279680/cfg.json b/drc33/bkynosqi/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2d3b13b716eb7750d27253eccb6afda7b822ab81 --- /dev/null +++ b/drc33/bkynosqi/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0190279680/model b/drc33/bkynosqi/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..c252ba59545edb298d6c743c6759deaec910de05 --- /dev/null +++ b/drc33/bkynosqi/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa564127f7ec70171b0976c85d64901cddf77efcfc512665846578e310f870f +size 20566089 diff --git a/drc33/bkynosqi/cp_0200294400/cfg.json b/drc33/bkynosqi/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..566d0ed24a656dc3dbe4fa1cbce9579f840a9b8e --- /dev/null +++ b/drc33/bkynosqi/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0200294400/model b/drc33/bkynosqi/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..281ce6e7d17b13c32f9f8f46394f6937372bef4b --- /dev/null +++ b/drc33/bkynosqi/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f887c1570f1b23435a70a62e2124cb41a160a50024ff405096965a029b2d8c2 +size 20566089 diff --git a/drc33/bkynosqi/cp_0300441600/cfg.json b/drc33/bkynosqi/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..575dbdf6dfb0ebb1b7a78797ce5e42effc573332 --- /dev/null +++ b/drc33/bkynosqi/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0300441600/model b/drc33/bkynosqi/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..88730abad7e924cd334fcec65d22e7eba9411d6a --- /dev/null +++ b/drc33/bkynosqi/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d43df987005d75d5eb076958734aac3321e87d70a2578b6ddf96862909ac8ef7 +size 20566089 diff --git a/drc33/bkynosqi/cp_0400588800/cfg.json b/drc33/bkynosqi/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..376d2891f1f02e080d4189a01c0d129df4f6daff --- /dev/null +++ b/drc33/bkynosqi/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0400588800/model b/drc33/bkynosqi/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..3d69b36e94b7fd71be473dab904ceb6f1466da53 --- /dev/null +++ b/drc33/bkynosqi/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30aed233edffdcb976107c95adb9ffe409a949ea57769a7a74c3fb96fd286ab8 +size 20566089 diff --git a/drc33/bkynosqi/cp_0500736000/cfg.json b/drc33/bkynosqi/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ec281aac75b20792066c1dd20acde7ff96d1e33a --- /dev/null +++ b/drc33/bkynosqi/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0500736000/model b/drc33/bkynosqi/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..2e21be8b55b39380462cd2029c7b4e6f0b6e0ae2 --- /dev/null +++ b/drc33/bkynosqi/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aae1ebe78262e9d5ee906c60e97c9854446bdf9a093ae511af6c1aa02b4b5dd1 +size 20566089 diff --git a/drc33/bkynosqi/cp_0600883200/cfg.json b/drc33/bkynosqi/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d11dfa0bb9db2d6773240aa89d8b1ffaee9199dc --- /dev/null +++ b/drc33/bkynosqi/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0600883200/model b/drc33/bkynosqi/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..665678fe89849c268bdaee3b0f503c4445962e65 --- /dev/null +++ b/drc33/bkynosqi/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b2df973db77fcca1cb2ea6f369402ce97fba5815c9f92bc62ad2c09e307285 +size 20566089 diff --git a/drc33/bkynosqi/cp_0701030400/cfg.json b/drc33/bkynosqi/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0702269b213d3c6745b104833a6b5600547aa1a2 --- /dev/null +++ b/drc33/bkynosqi/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0701030400/model b/drc33/bkynosqi/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..e058c0dcdd2d43818f16c47bb00554b6d43295c7 --- /dev/null +++ b/drc33/bkynosqi/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f4514dbcbce845e27fddddc0fccdc6d353a75e61e6ebd79ddda537c8a21d82 +size 20566089 diff --git a/drc33/bkynosqi/cp_0801177600/cfg.json b/drc33/bkynosqi/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f2956bfabf2ac4ad6be6a5c189ea4438d865db38 --- /dev/null +++ b/drc33/bkynosqi/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0801177600/model b/drc33/bkynosqi/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..7ca3240484f20fd79ee2ad83329fc418eb58d8db --- /dev/null +++ b/drc33/bkynosqi/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42ccf4875d76f9b2666fa54ff4a113b24f2d0c7d04372094f8ac27bff906926 +size 20566089 diff --git a/drc33/bkynosqi/cp_0901324800/cfg.json b/drc33/bkynosqi/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d7a3c6a4ae2b8ce317cab4da4959bba51581d6ea --- /dev/null +++ b/drc33/bkynosqi/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_0901324800/model b/drc33/bkynosqi/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..07e161192078feb0e8525586e09d1386db527b23 --- /dev/null +++ b/drc33/bkynosqi/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:117aef07cf1dd701cfbfbf30b6b40c379fa75dcd74e1ae7459b2a0999004209c +size 20566089 diff --git a/drc33/bkynosqi/cp_1001472000/cfg.json b/drc33/bkynosqi/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..72f5917c5faa8c9fd41890e731fd8302dcddb776 --- /dev/null +++ b/drc33/bkynosqi/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1001472000/model b/drc33/bkynosqi/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..e5d68334a828ab697113e98cf8d3f441e82a3bfb --- /dev/null +++ b/drc33/bkynosqi/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b24d949968c231c81e2821e4e049ed28b2df6c4352a261cf5b8e58452de779 +size 20566089 diff --git a/drc33/bkynosqi/cp_1101619200/cfg.json b/drc33/bkynosqi/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b78a197964fc3aebd23f46bc8c5aa18df95e1dd5 --- /dev/null +++ b/drc33/bkynosqi/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1101619200/model b/drc33/bkynosqi/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..906505f9fbcb62c20cfa73e3a8ed1ed6248763c7 --- /dev/null +++ b/drc33/bkynosqi/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d7da96504107d7ad50de9461a09f07a705bf1b0325b89d00ba745fe2d0f651 +size 20566089 diff --git a/drc33/bkynosqi/cp_1201766400/cfg.json b/drc33/bkynosqi/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0f3845d4a806a254f5801f1f1229077ec1fc6eff --- /dev/null +++ b/drc33/bkynosqi/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1201766400/model b/drc33/bkynosqi/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..af36b3d4fb9072304b91e8df697ccffd45aa3087 --- /dev/null +++ b/drc33/bkynosqi/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bfc6086e45ab4061aa7d5fd141c892f7f7e52541b15ade5752f4162bc7b5d3f +size 20566089 diff --git a/drc33/bkynosqi/cp_1301913600/cfg.json b/drc33/bkynosqi/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..778accfe18f09ad212f5bfa99dd8a309dc9064f1 --- /dev/null +++ b/drc33/bkynosqi/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1301913600/model b/drc33/bkynosqi/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..bc54dd4968ba40348bf75019bcd6644df73ecfeb --- /dev/null +++ b/drc33/bkynosqi/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32ff885728f004e1f9861af551572ca08ef2afb1b8ffb440c63be414154a2df +size 20566089 diff --git a/drc33/bkynosqi/cp_1402060800/cfg.json b/drc33/bkynosqi/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..eeedc5b218b64043f94ae19f3d233e2254b7a0bc --- /dev/null +++ b/drc33/bkynosqi/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1402060800/model b/drc33/bkynosqi/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..f042e55fcac993bf76eb037ddf4ae64419ac7de6 --- /dev/null +++ b/drc33/bkynosqi/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:299280f0f033006d331d1a64f46be4f6530fb0f256540057a8f349f9e877554c +size 20566089 diff --git a/drc33/bkynosqi/cp_1502208000/cfg.json b/drc33/bkynosqi/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..69baa3375adb93f2afef38bf46831c7a75c86dc0 --- /dev/null +++ b/drc33/bkynosqi/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1502208000/model b/drc33/bkynosqi/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..0132abbf4a88176a6eba76aaab203d22fb3c1257 --- /dev/null +++ b/drc33/bkynosqi/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7705ef3ff2e10fb84bd206022bfcaad04996657a318b971218b89f51dbfaa5 +size 20566089 diff --git a/drc33/bkynosqi/cp_1602355200/cfg.json b/drc33/bkynosqi/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ca4a0a151d095fdd4be733fa32e5772dff9cddb4 --- /dev/null +++ b/drc33/bkynosqi/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1602355200/model b/drc33/bkynosqi/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..324996ca4080e0754163b0692feb68ced26bc45b --- /dev/null +++ b/drc33/bkynosqi/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf72dd653ec8f81fc2e2333ecee48844e193d98ee46e90df33e5b352ec98df82 +size 20566089 diff --git a/drc33/bkynosqi/cp_1702502400/cfg.json b/drc33/bkynosqi/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..224f846edc2d9e993c65233dc5b0d641c564182f --- /dev/null +++ b/drc33/bkynosqi/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1702502400/model b/drc33/bkynosqi/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..92cd461749a3babd84fd164b3e0ccf8697843d45 --- /dev/null +++ b/drc33/bkynosqi/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ea33da52842a7a36bfef4e8a04c9ba3e8197699cc326e5dc7dd563867f8e48 +size 20566089 diff --git a/drc33/bkynosqi/cp_1802649600/cfg.json b/drc33/bkynosqi/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..16aaca5bc86922323e8669cd7346a6ac30b36b01 --- /dev/null +++ b/drc33/bkynosqi/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1802649600/model b/drc33/bkynosqi/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..da0324845b1506a61e9ac86a68916bb71f021963 --- /dev/null +++ b/drc33/bkynosqi/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb4c3eea5f3c58ca5d6c8de2be4a9aad2d7d38a7c3f2fc41d36db624710e3d4 +size 20566089 diff --git a/drc33/bkynosqi/cp_1902796800/cfg.json b/drc33/bkynosqi/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7c5158db8bcc88bc508bd89a54b4dcb12f656457 --- /dev/null +++ b/drc33/bkynosqi/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_1902796800/model b/drc33/bkynosqi/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..647ce80d7aba4b64c4c1c1defe3b9f82d2aa4ee7 --- /dev/null +++ b/drc33/bkynosqi/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b92d3816961eea276cff764bc67c05dc079a95a630df3703a2f298b51df770 +size 20566089 diff --git a/drc33/bkynosqi/cp_2002944000/cfg.json b/drc33/bkynosqi/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..de591e3394d9602e90f65d61e30d23fefbd32f5f --- /dev/null +++ b/drc33/bkynosqi/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc33/bkynosqi/cp_2002944000/model b/drc33/bkynosqi/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..c6cd6b3ac04c4f1d1d663f1d2f1589241fca0ca3 --- /dev/null +++ b/drc33/bkynosqi/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779e7e03cbffce1b16cc7f75dc91882f31c0219c84128469a020303796a89ddd +size 20566089 diff --git a/drc33/gobfm3wm/cp_0000998400/cfg.json b/drc33/gobfm3wm/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9a527b356c666070f99c6b8ee6aec1456904a35f --- /dev/null +++ b/drc33/gobfm3wm/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0000998400/model b/drc33/gobfm3wm/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..222d1c8c89f88730e1b6b76539b3c5dfe1cefe77 --- /dev/null +++ b/drc33/gobfm3wm/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e53183d777659eddf0351d8d2fe652fab10f273d49e84edfd052cebaa7e8a688 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0002001920/cfg.json b/drc33/gobfm3wm/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..92719d03dd3647cca5d4a224a05648199a0cc399 --- /dev/null +++ b/drc33/gobfm3wm/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0002001920/model b/drc33/gobfm3wm/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..78f6996f58bbe68613aa5bbc20c4f3e11d46dc3b --- /dev/null +++ b/drc33/gobfm3wm/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a188c2a4947469ec2abeda67db5966bafbf8c48f1bb586e9a767ef2a47568d +size 20566089 diff --git a/drc33/gobfm3wm/cp_0003000320/cfg.json b/drc33/gobfm3wm/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..51930630db5b69b7e0980570d5aa4199d1c9e142 --- /dev/null +++ b/drc33/gobfm3wm/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0003000320/model b/drc33/gobfm3wm/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..bb55e6b7be00d31020fd74f821854e740f4f20e1 --- /dev/null +++ b/drc33/gobfm3wm/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3825a9bd37897ef1dbfab41b3685d4f1e11d95c2065caa1769da53221c47e3 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0004003840/cfg.json b/drc33/gobfm3wm/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..624bfbf8f6e2c0c9e287e6682435ea77247f411c --- /dev/null +++ b/drc33/gobfm3wm/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0004003840/model b/drc33/gobfm3wm/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..6d9cbc635490ef18a9b2d0e05fa970e14b12a27c --- /dev/null +++ b/drc33/gobfm3wm/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3dbc733e744c1fdf5243f926187a1c6a264183c33fbc66ed788db9f6797a44b +size 20566089 diff --git a/drc33/gobfm3wm/cp_0005007360/cfg.json b/drc33/gobfm3wm/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bb3935689b540448141e61b2612d2c22ad87eb4c --- /dev/null +++ b/drc33/gobfm3wm/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0005007360/model b/drc33/gobfm3wm/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..01409f4775fa101a05f89582b0ef6f66358e70c9 --- /dev/null +++ b/drc33/gobfm3wm/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89344bf11af934c65ea835c38f266ac1adcea7c8a610af0ea7ff2b7408ae3c90 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0006005760/cfg.json b/drc33/gobfm3wm/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4f78425d795c25ca1ac02589cdebe8265694aaac --- /dev/null +++ b/drc33/gobfm3wm/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0006005760/model b/drc33/gobfm3wm/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..f58ad3e2fc2bde790eb574753e05582060246355 --- /dev/null +++ b/drc33/gobfm3wm/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be86da7be0dd90ad33dd005499bd7d21e447dc489068dbfcb26fcd4080d5531 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0007009280/cfg.json b/drc33/gobfm3wm/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3ee8484648749ab2a9ebf3a17c994783dd73cb --- /dev/null +++ b/drc33/gobfm3wm/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0007009280/model b/drc33/gobfm3wm/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..71a7f9a34ed4c7190c60bc57719974cd388a68f9 --- /dev/null +++ b/drc33/gobfm3wm/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba2a78b42d3f7d1a84fe5c9eae9ab2355428978d9aaf0997209cd515f62567d +size 20566089 diff --git a/drc33/gobfm3wm/cp_0008007680/cfg.json b/drc33/gobfm3wm/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4282a4010b48e00da5e7015187c1ee04173cd9a8 --- /dev/null +++ b/drc33/gobfm3wm/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0008007680/model b/drc33/gobfm3wm/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..33eb284e2b49fd16ff7bfb75d8673959c7a57cc9 --- /dev/null +++ b/drc33/gobfm3wm/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69441417d6ac475b549b171cd3a7c40b334a6ce7d87e2860309b4af45f3bf83d +size 20566089 diff --git a/drc33/gobfm3wm/cp_0009011200/cfg.json b/drc33/gobfm3wm/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..935abccedb7ba22342ff04dd2d1181b2a235b701 --- /dev/null +++ b/drc33/gobfm3wm/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0009011200/model b/drc33/gobfm3wm/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..cec63445217ef15da3d66d0bf4bd8cfa3201ad46 --- /dev/null +++ b/drc33/gobfm3wm/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d66b4104ca45c298120fdbf12ea61172215cdba00cfcc88608a755678e80cc26 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0010014720/cfg.json b/drc33/gobfm3wm/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c6a0908a0abf32a92400ec15a6716d15f0601b7c --- /dev/null +++ b/drc33/gobfm3wm/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0010014720/model b/drc33/gobfm3wm/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..2ece2801c7677ea2c122c0cebd8ba0dda912ec6c --- /dev/null +++ b/drc33/gobfm3wm/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56437704865102506c66a713781a503ded8932a1b1bb7b42c028ff261d84b4d +size 20566089 diff --git a/drc33/gobfm3wm/cp_0011013120/cfg.json b/drc33/gobfm3wm/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3f0f95374f2956182bc33dbf1e830d0a6937cc --- /dev/null +++ b/drc33/gobfm3wm/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0011013120/model b/drc33/gobfm3wm/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..6f8ddbf01acc6fb2593a462f4f28e2018c6c23d3 --- /dev/null +++ b/drc33/gobfm3wm/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed9cf673dc32e8ffbbc1edd0a757756718897b6d21e4cbdf6694ede3fd80b3c +size 20566089 diff --git a/drc33/gobfm3wm/cp_0012016640/cfg.json b/drc33/gobfm3wm/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..685c70aa6c5d8d1287bdabb254c3e6ed663cd5a1 --- /dev/null +++ b/drc33/gobfm3wm/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0012016640/model b/drc33/gobfm3wm/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..463314c198b3e8513641d6db37debab6e8b0f9e3 --- /dev/null +++ b/drc33/gobfm3wm/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ec823e36d7abf82b7cdf82196f1227a76e8402e56b9f1cb745f7023a08c993a +size 20566089 diff --git a/drc33/gobfm3wm/cp_0013015040/cfg.json b/drc33/gobfm3wm/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6f33e630a60c77e56d20fb32cb81475fdc2f4119 --- /dev/null +++ b/drc33/gobfm3wm/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0013015040/model b/drc33/gobfm3wm/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..0135b545381c731eb47e05b7be3a8c3b8465d1a1 --- /dev/null +++ b/drc33/gobfm3wm/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18830b52a609707c96e0d63fd81f0635cdcdb56bb4b2d04dd00736949ccb9001 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0014018560/cfg.json b/drc33/gobfm3wm/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bb414f857f0597295991f2303e41af39281bf8db --- /dev/null +++ b/drc33/gobfm3wm/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0014018560/model b/drc33/gobfm3wm/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..89090691a492ca9403999301315e8dab509093b3 --- /dev/null +++ b/drc33/gobfm3wm/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:176fd1001f62a7ef375718173b3383ec09aa4e07684bf59ff91519806246868c +size 20566089 diff --git a/drc33/gobfm3wm/cp_0015022080/cfg.json b/drc33/gobfm3wm/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8aa4e2612db9b5e02c77960f0fadfca5081a2537 --- /dev/null +++ b/drc33/gobfm3wm/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0015022080/model b/drc33/gobfm3wm/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..c5425410274090feccc4b31bda18ccf7df9c6cf1 --- /dev/null +++ b/drc33/gobfm3wm/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d86f678314266a9940173d6443150555ad37676b7bc5623364c9b1794bf97b +size 20566089 diff --git a/drc33/gobfm3wm/cp_0016020480/cfg.json b/drc33/gobfm3wm/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b890da61ca0ff91ee3cbbb0100301ad377ed0f84 --- /dev/null +++ b/drc33/gobfm3wm/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0016020480/model b/drc33/gobfm3wm/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..7489cf287957353c3d29ad03f7000f14c0518130 --- /dev/null +++ b/drc33/gobfm3wm/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bad86ded1bd995e84254bd2459ebbcbff0c7bce065b441c8d8129c3e2504415 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0017024000/cfg.json b/drc33/gobfm3wm/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..668f67d2575f6c31e8fb039ccaf8876a7d7dce84 --- /dev/null +++ b/drc33/gobfm3wm/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0017024000/model b/drc33/gobfm3wm/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..41539842a77211f6b3def4b66643744967c66a9d --- /dev/null +++ b/drc33/gobfm3wm/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8770f0546a35444d22e2950b939967406de4549c1f0e21511c81710a17e52111 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0018022400/cfg.json b/drc33/gobfm3wm/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4f195ccef898649f8b0a2cd472fa4ccfa54f05a3 --- /dev/null +++ b/drc33/gobfm3wm/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0018022400/model b/drc33/gobfm3wm/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..817c6f94260306e3208e071975fa7dc0d6a9996c --- /dev/null +++ b/drc33/gobfm3wm/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96fd5d0d3e8cdb4c47a9ee92bd965b0292b57cc1f16e5b42ec97d5efa0c80249 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0019025920/cfg.json b/drc33/gobfm3wm/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a2981ba95adfaa22ad012b3136889103d18fb2f0 --- /dev/null +++ b/drc33/gobfm3wm/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0019025920/model b/drc33/gobfm3wm/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..dd2dd002a1fcc2b8b47277138875492f79226f58 --- /dev/null +++ b/drc33/gobfm3wm/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72514aa45392a085ea5d9bd33e5b1c547eb750e0ec152e2668f1e4267649ae3 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0020029440/cfg.json b/drc33/gobfm3wm/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..817db8155a5c6796a3b5420f0d836546b5510c16 --- /dev/null +++ b/drc33/gobfm3wm/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0020029440/model b/drc33/gobfm3wm/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..721492a982476098df081195a66d8b1334a2b144 --- /dev/null +++ b/drc33/gobfm3wm/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28100d3622f3f385dc0586f9cbd97761c480689567695003359158d59047436f +size 20566089 diff --git a/drc33/gobfm3wm/cp_0030044160/cfg.json b/drc33/gobfm3wm/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6aaac297c3fcc206231b81e7adf508569dd101d7 --- /dev/null +++ b/drc33/gobfm3wm/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0030044160/model b/drc33/gobfm3wm/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..d72142fd635aaa062a298671ce4138428ca9f64b --- /dev/null +++ b/drc33/gobfm3wm/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d8b1398750b4d8f8d53a79fab29e6cec22bf7feb50d961d98a3ee6986f4a1a +size 20566089 diff --git a/drc33/gobfm3wm/cp_0040058880/cfg.json b/drc33/gobfm3wm/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..64e3485e3b47539c3836041c2de8f4591b083c74 --- /dev/null +++ b/drc33/gobfm3wm/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0040058880/model b/drc33/gobfm3wm/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..cbd88964df8ce83122375f903bda6de58ccee72f --- /dev/null +++ b/drc33/gobfm3wm/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb19df0fe8de931793fbb0467ccab7b1f1e628f018675edfb5a81edcea49117 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0050073600/cfg.json b/drc33/gobfm3wm/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c7df811beae0951398f5003fbd248d4fc79d12a --- /dev/null +++ b/drc33/gobfm3wm/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0050073600/model b/drc33/gobfm3wm/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..dcaa28948588f7ac8b8f082958b483d6b5e4d2af --- /dev/null +++ b/drc33/gobfm3wm/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150b15d05ce7143050e69aefff73ef45db795e570b43ec3f2ca638f1b6c37bbd +size 20566089 diff --git a/drc33/gobfm3wm/cp_0060088320/cfg.json b/drc33/gobfm3wm/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bcde7fcd11776348d80882a0937936f0d91c4127 --- /dev/null +++ b/drc33/gobfm3wm/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0060088320/model b/drc33/gobfm3wm/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..a49a19791905574b8bc7b3ccf5402aa937f4d153 --- /dev/null +++ b/drc33/gobfm3wm/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e38d5ec8e58905bf3e1fadd4df66d843cda3affa025ee4316ad8c0a7c26cef +size 20566089 diff --git a/drc33/gobfm3wm/cp_0070103040/cfg.json b/drc33/gobfm3wm/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..07e1914ff0d5fb87e74c4a351c1506d35d50256f --- /dev/null +++ b/drc33/gobfm3wm/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0070103040/model b/drc33/gobfm3wm/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..505324dc5be3c233c7b2e300267d6f9ecb8dd5ec --- /dev/null +++ b/drc33/gobfm3wm/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71196737270bd6b56d216bef3cd7737acc62cf8a1f98fea6ba81a71d669e6184 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0080117760/cfg.json b/drc33/gobfm3wm/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ec42770f4c336c87a7386c3195963a23748fe844 --- /dev/null +++ b/drc33/gobfm3wm/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0080117760/model b/drc33/gobfm3wm/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..eb7db4a18d8bf243638ee044478bf45105cc837c --- /dev/null +++ b/drc33/gobfm3wm/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc4a34b8c50e9a7ff065193351c32f2ab37199b4895977f3f8da1bae386afe4 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0090132480/cfg.json b/drc33/gobfm3wm/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3f76e2094ad865ddf9e0199437dd33303cf7a521 --- /dev/null +++ b/drc33/gobfm3wm/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0090132480/model b/drc33/gobfm3wm/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..43a6958b79869860bfc0ac1beb6a783f6dc57e9c --- /dev/null +++ b/drc33/gobfm3wm/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76fb5024c4b41e80fafd7dc008c2dc0bd3500563f6762400a7bdabbaea36eb79 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0100147200/cfg.json b/drc33/gobfm3wm/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..910c8f4b900d09c9ae0827ae6565fddc0fbe50d1 --- /dev/null +++ b/drc33/gobfm3wm/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0100147200/model b/drc33/gobfm3wm/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..8ac7a106667e353ae2108dfe088cf1c1b6954929 --- /dev/null +++ b/drc33/gobfm3wm/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2f9a07598c8c4d2c5304e59b6db2af88673fdcaac3007edd6e1252c8479a3f4 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0110161920/cfg.json b/drc33/gobfm3wm/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9137ea73a4ecc75aea43fd038d9d256dae02208a --- /dev/null +++ b/drc33/gobfm3wm/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0110161920/model b/drc33/gobfm3wm/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..942c4bada7a8e847a13b8474b6ed8386f045a1ca --- /dev/null +++ b/drc33/gobfm3wm/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f889334dc1cfb71503973a21e2632bee52ee137685405cf688b8e40ae732e26d +size 20566089 diff --git a/drc33/gobfm3wm/cp_0120176640/cfg.json b/drc33/gobfm3wm/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82db737236678b6e61f681f3a97d42dfd5b9213b --- /dev/null +++ b/drc33/gobfm3wm/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0120176640/model b/drc33/gobfm3wm/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..8e4fed45e082e07387e294f0fe68c409350b7f43 --- /dev/null +++ b/drc33/gobfm3wm/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d473ba9d54d01a4c1aba1fd9306f3e6310abffd9fb215c082faac449ca8cf82 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0130191360/cfg.json b/drc33/gobfm3wm/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4180c80611c0b6dcf28600641210963bd600eb80 --- /dev/null +++ b/drc33/gobfm3wm/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0130191360/model b/drc33/gobfm3wm/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..91d9379c81fe484f5f7e657106af6ef85b309c32 --- /dev/null +++ b/drc33/gobfm3wm/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f209ea62783dcff0efa978ee427fd6a032ac7e435ea2e1a2da66b470fad7d2 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0140206080/cfg.json b/drc33/gobfm3wm/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4a41fe81cf0917ba71d0640020e4591ba173f971 --- /dev/null +++ b/drc33/gobfm3wm/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0140206080/model b/drc33/gobfm3wm/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..b6e2724613624138ff33f6f93094132f5a92c6c6 --- /dev/null +++ b/drc33/gobfm3wm/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50f91a1102936e209da22552f3a92a430957f6ab88e234bf91b40c0fd73b6dc +size 20566089 diff --git a/drc33/gobfm3wm/cp_0150220800/cfg.json b/drc33/gobfm3wm/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c90cca21dc99acf0d507bce0e2e62a0c2df605eb --- /dev/null +++ b/drc33/gobfm3wm/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0150220800/model b/drc33/gobfm3wm/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..8795492b37af310fbb80e320b6f69f1ec374cb8e --- /dev/null +++ b/drc33/gobfm3wm/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eef0654eb6874c9ddd876305629538069e0bf7481ef38d01eafa187acb89eb0 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0160235520/cfg.json b/drc33/gobfm3wm/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..326eb7f7caa4bd1f711b5e77ef43e609e400d68c --- /dev/null +++ b/drc33/gobfm3wm/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0160235520/model b/drc33/gobfm3wm/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..9c47318b1e5011b18455268058dc47119d542cff --- /dev/null +++ b/drc33/gobfm3wm/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e08ab25fc9736eaa70e495dc9df34b2d5cbcc96f30489928fd3941d54e3c4ba +size 20566089 diff --git a/drc33/gobfm3wm/cp_0170250240/cfg.json b/drc33/gobfm3wm/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..58d93dda84a5b0dd41fad5b27d99ceba46288f3a --- /dev/null +++ b/drc33/gobfm3wm/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0170250240/model b/drc33/gobfm3wm/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..22da555c8407fbd725bf03c5b0b5fabe14afe0ad --- /dev/null +++ b/drc33/gobfm3wm/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba562c1e391292a1bac1abe6c9598ecd4e1ed48f85d40734212e275ff4257d28 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0180264960/cfg.json b/drc33/gobfm3wm/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e457e96b2287e9653b659b8dfa66df0a78ecbe99 --- /dev/null +++ b/drc33/gobfm3wm/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0180264960/model b/drc33/gobfm3wm/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..4d7bf3917b03972bffd2f1577e469f53dc598dea --- /dev/null +++ b/drc33/gobfm3wm/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e61ff4376b9e6f9726f0b82831d3d2731821e1379e5cd83c7c54af50dfad6a13 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0190279680/cfg.json b/drc33/gobfm3wm/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..13f50d45004dc6d9ef0039b25bbbc57e7cb943a1 --- /dev/null +++ b/drc33/gobfm3wm/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0190279680/model b/drc33/gobfm3wm/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..c9e1e11babb2bef46e9ff61f52262c264a4a3511 --- /dev/null +++ b/drc33/gobfm3wm/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ec4fdf7173cb7aab93c9cd96d69d595f0d07afd9ce7519e59d1c563f0d067d7 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0200294400/cfg.json b/drc33/gobfm3wm/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..39dd7d6ed7c3e7fe41d5c2be6496072d5cbab5e4 --- /dev/null +++ b/drc33/gobfm3wm/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0200294400/model b/drc33/gobfm3wm/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..613f5d466aaaf4e4058c9fad448c1e5f36726959 --- /dev/null +++ b/drc33/gobfm3wm/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66889892f2060e7d9ba1bec720fca65bc2e186e70b5c6b0747950f8dd474b1b7 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0300441600/cfg.json b/drc33/gobfm3wm/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..41611825c0e4e0706600c9596fb4695202c6afc4 --- /dev/null +++ b/drc33/gobfm3wm/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0300441600/model b/drc33/gobfm3wm/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..e20a0cee962a306a3008fcd7124f369c058185eb --- /dev/null +++ b/drc33/gobfm3wm/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bc00ef8113b5459606f70ede6557b4c12381490adf4a6da5c5b5800623ddaa6 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0400588800/cfg.json b/drc33/gobfm3wm/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f091e6f351a1dc0b05c272d3dbb2df4f261faee2 --- /dev/null +++ b/drc33/gobfm3wm/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0400588800/model b/drc33/gobfm3wm/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..f087438748ba11315b67e798eed2a1411b598e5a --- /dev/null +++ b/drc33/gobfm3wm/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdfd25a3001a693dbafe0c39be5f4c642fe994f6b711dad2be0b3fffb34c4c92 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0500736000/cfg.json b/drc33/gobfm3wm/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ef31fb9b51b012e9b542f38e8556e0fe13a8ed8c --- /dev/null +++ b/drc33/gobfm3wm/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0500736000/model b/drc33/gobfm3wm/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..fb1c77c851d294902bf959e2ba0479a81ec54935 --- /dev/null +++ b/drc33/gobfm3wm/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d77e71819a56b0436a99249e9cd83561209786d29c70d2c4342e2acbc4fc8047 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0600883200/cfg.json b/drc33/gobfm3wm/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..93fbf1a9815789b74593128b7e8297d5893c03d8 --- /dev/null +++ b/drc33/gobfm3wm/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0600883200/model b/drc33/gobfm3wm/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..3b0918e638fb6441dfd95ea2323592945ff89494 --- /dev/null +++ b/drc33/gobfm3wm/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e20808759ac22352feffb5e8eae9482a606a050ec81fd5963663adc3333988 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0701030400/cfg.json b/drc33/gobfm3wm/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3a8853136d664bd35c2ec557461960042785bda9 --- /dev/null +++ b/drc33/gobfm3wm/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0701030400/model b/drc33/gobfm3wm/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..8829eef79dabcca01eccecf69b29ffa09227204d --- /dev/null +++ b/drc33/gobfm3wm/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c446205a0888abeb9beb6f0cb7388c2c19935e2b9f76e8dc02910da886184212 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0801177600/cfg.json b/drc33/gobfm3wm/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..487a293212fc70aa4fe54bed46edc9d0da4167c1 --- /dev/null +++ b/drc33/gobfm3wm/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0801177600/model b/drc33/gobfm3wm/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..94ae3938f9307f46522424d76693f358e3cb3a4e --- /dev/null +++ b/drc33/gobfm3wm/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10f6bcc131e8d49beb547f92adb536bce52b892edb8ba9f77b67f2711c556a06 +size 20566089 diff --git a/drc33/gobfm3wm/cp_0901324800/cfg.json b/drc33/gobfm3wm/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f5a5456cba1f112ef72bad67c5b0b19d5aa9ad28 --- /dev/null +++ b/drc33/gobfm3wm/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_0901324800/model b/drc33/gobfm3wm/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..d00d35b9a295372cc56837264f67d6758bbf3815 --- /dev/null +++ b/drc33/gobfm3wm/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94baba05d1a206f60ee0fa3905ddef1710afbef3c8f4285a868d56a528c0d812 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1001472000/cfg.json b/drc33/gobfm3wm/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..80b2cc7610b22a5c22868a2709383f8a348766c6 --- /dev/null +++ b/drc33/gobfm3wm/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1001472000/model b/drc33/gobfm3wm/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..a0c583cd5eb442473f4ca17dfe8094031d6d450a --- /dev/null +++ b/drc33/gobfm3wm/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:642a28ff26aa57ace055ad341086a7e46e897559f7ad18af8527bf701c608fd1 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1101619200/cfg.json b/drc33/gobfm3wm/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..76f0069d0a1807a44aa7fa23ac01b65209881e45 --- /dev/null +++ b/drc33/gobfm3wm/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1101619200/model b/drc33/gobfm3wm/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..62bbd19d04c103c3c406b5e22ccf2e1b79fc7570 --- /dev/null +++ b/drc33/gobfm3wm/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beeadee463dba50d761b4d1b30d8ccfc0d50de61233cb3c9bcc6d6fe6a846b41 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1201766400/cfg.json b/drc33/gobfm3wm/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a88075757d50998758caa22338926111f59d4a15 --- /dev/null +++ b/drc33/gobfm3wm/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1201766400/model b/drc33/gobfm3wm/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..027b4e49117a2dc084c5874289d37c02879a4d86 --- /dev/null +++ b/drc33/gobfm3wm/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5014f47ea437ee04ccd6bfeffc4e15d0fbfee8948bb15b21390b5d39f2ca52 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1301913600/cfg.json b/drc33/gobfm3wm/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ebaac5d189e0004883a99a419fed454fcadbfa20 --- /dev/null +++ b/drc33/gobfm3wm/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1301913600/model b/drc33/gobfm3wm/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..d943dbdeaab5bc4eb19457088380cb11ba09a48f --- /dev/null +++ b/drc33/gobfm3wm/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a5de83c023ca2d4db78e80a1bdf64d0c197e3ba1c7a00aff0e48e0b7d6a4d7 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1402060800/cfg.json b/drc33/gobfm3wm/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b21ed9a24583ed82e56316db9da69cc1f2fcd919 --- /dev/null +++ b/drc33/gobfm3wm/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1402060800/model b/drc33/gobfm3wm/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..f17b9c227d164f27c42abc3771f9ead5ee300096 --- /dev/null +++ b/drc33/gobfm3wm/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b754ce960e71ddaf8451468f0bfe45ffd7e229061412329d977a961fa5f2d07 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1502208000/cfg.json b/drc33/gobfm3wm/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f42338f11219605621e3a25e82a7b07c79d616cf --- /dev/null +++ b/drc33/gobfm3wm/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1502208000/model b/drc33/gobfm3wm/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..4b8e2f71f4df282e723de8f92e080040a51258da --- /dev/null +++ b/drc33/gobfm3wm/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079f6052937ddd727f78d5903d1e89a8198d90a6876e729cac580e756de1da21 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1602355200/cfg.json b/drc33/gobfm3wm/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c040dca9946fd75d8e5ce3baa1e05d4195bf92af --- /dev/null +++ b/drc33/gobfm3wm/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1602355200/model b/drc33/gobfm3wm/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..c4fb491d7d0aba4adaece659a62f4020e23807a6 --- /dev/null +++ b/drc33/gobfm3wm/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2506e9e0c4d15bad7d0e15e6654f5575d01d293e96e1efdd26e8e822947852c8 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1702502400/cfg.json b/drc33/gobfm3wm/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9952bd2c9697079a9f2084fa6c4ee57db85016f0 --- /dev/null +++ b/drc33/gobfm3wm/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1702502400/model b/drc33/gobfm3wm/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..90efd446505fce5ceb309216e8ef3a28caf4c837 --- /dev/null +++ b/drc33/gobfm3wm/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ae63d39202394aecdccee8618a400b0dfc6bf6db6484969ae994f12594670c +size 20566089 diff --git a/drc33/gobfm3wm/cp_1802649600/cfg.json b/drc33/gobfm3wm/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f0a90ca53651e61f81849461eaf19c6cd94c3222 --- /dev/null +++ b/drc33/gobfm3wm/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1802649600/model b/drc33/gobfm3wm/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..2c718cdeb27d94ee9ec7c246da7f6c7feeb0e981 --- /dev/null +++ b/drc33/gobfm3wm/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c2c194738fde9e8ed37530084a36d3f1a7a2c3db2fcf1e1a021b10e57abaa8 +size 20566089 diff --git a/drc33/gobfm3wm/cp_1902796800/cfg.json b/drc33/gobfm3wm/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..09dc1301387ed87421c69f227f2517b53dd103da --- /dev/null +++ b/drc33/gobfm3wm/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_1902796800/model b/drc33/gobfm3wm/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..7af2475be0976420367a851ffbb0e3407f9c690a --- /dev/null +++ b/drc33/gobfm3wm/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30d24db676da0c805c32fafad33de56b150ef248d22b3c8dc9b1e6b0b25527b1 +size 20566089 diff --git a/drc33/gobfm3wm/cp_2002944000/cfg.json b/drc33/gobfm3wm/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..78cc204b59ffb4b2ddf2cf3b8c018e1cd299449f --- /dev/null +++ b/drc33/gobfm3wm/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc33/gobfm3wm/cp_2002944000/model b/drc33/gobfm3wm/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..f8027acb1dea27a753184886b58b59711bfb7637 --- /dev/null +++ b/drc33/gobfm3wm/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6013e3b49b3eeb54670cd99f70accafe7eb766cf0dc672bd0bf1afc2cf5fca +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0000998400/cfg.json b/drc33/jl6bq8ih/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e70d91be3b0ed184bc90fd6ff5d4656498f1fa52 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0000998400/model b/drc33/jl6bq8ih/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..974459a3271ed4113a4a566a7f4cef5d6778cb42 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3524c1644c90a309d8e5eb43c4e9bb14ca5805d3b2d738d9bd3b810b212048c +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0002001920/cfg.json b/drc33/jl6bq8ih/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ef30bcbb6d77929362282459b1c83cb3d3fe9996 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0002001920/model b/drc33/jl6bq8ih/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..4d285331c63538418452240258fce0772f961182 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81056cd22364a52487fdffc169353910c1fefd5752f1c75ed5a5db96d52cad91 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0003000320/cfg.json b/drc33/jl6bq8ih/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..35998dd8e5c7362dbab8bbc684cda47f42e3f02e --- /dev/null +++ b/drc33/jl6bq8ih/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0003000320/model b/drc33/jl6bq8ih/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..2ff7291f858b3e56865dff1004426dcc08f0689b --- /dev/null +++ b/drc33/jl6bq8ih/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4eaddb576f1ac78d665dfd157b6dff4ec2d78a8cd0a5abf2ef5080b3e27162 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0004003840/cfg.json b/drc33/jl6bq8ih/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bad7646266e1b501fd283b31201cbf1d782f8e66 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0004003840/model b/drc33/jl6bq8ih/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..b2cfc6d1cb73b147244b1a8cf0b132be125d21f4 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8c7e856f59a01b34d823d5eb37b002507f0a880c881fc8bf25420a479a4cd0 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0005007360/cfg.json b/drc33/jl6bq8ih/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77ec34c9fe9313b2e6cb10bfde96493c08a9ad3e --- /dev/null +++ b/drc33/jl6bq8ih/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0005007360/model b/drc33/jl6bq8ih/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..63c9c10ccfd04dc4bf474020b9a2985b14ac6f90 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90466532ad27a08e80301c81f41c6be4e5eef1acebff92a99cafb188c54a104 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0006005760/cfg.json b/drc33/jl6bq8ih/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..089c2d9d719d0a86c07c3e22e6a3a1e9f70fb3f5 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0006005760/model b/drc33/jl6bq8ih/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..57569e5b932eb15437426e85b90ed0c9701b3755 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c3e363ab26d61d3e634204511639492d0f38c9067fe2991a7420a8cecc348c9 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0007009280/cfg.json b/drc33/jl6bq8ih/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..705ffd54683e4a2ba6730c226c674c82d0717d08 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0007009280/model b/drc33/jl6bq8ih/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..99c673f2f3896ba855d9c0eb3cd50f9aaf25aabc --- /dev/null +++ b/drc33/jl6bq8ih/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bed06dbbbf1f009bd259594e10efec8e7b8073244311b49bf922ee5ea066cac +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0008007680/cfg.json b/drc33/jl6bq8ih/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0afb0219b68f3a272df912ff88a8514448b55a68 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0008007680/model b/drc33/jl6bq8ih/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..2334178bc3dc5b8dfeb2317ef47c19b379a4cf64 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248bb8ef15d65796acfdb8d832e69b832b74c550962a264291fb456778d302f0 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0009011200/cfg.json b/drc33/jl6bq8ih/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f5b02446a6e42207ff8f01ac277810cc7aae0a5d --- /dev/null +++ b/drc33/jl6bq8ih/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0009011200/model b/drc33/jl6bq8ih/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..eda5cc389f4718d46f23cb95f518e904518367ab --- /dev/null +++ b/drc33/jl6bq8ih/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003c828eea71cd8bdc180505eaa7e239c7945bb40b02773950f24832fe95d668 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0010014720/cfg.json b/drc33/jl6bq8ih/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2904ce1437329339519396cab9b5b81d947ab53a --- /dev/null +++ b/drc33/jl6bq8ih/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0010014720/model b/drc33/jl6bq8ih/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..5a0c8843d3c4a8955ecbf95b89cb959b8d8544dc --- /dev/null +++ b/drc33/jl6bq8ih/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37946539c6c3785588fc650fa399241c32206b2dec3d510d6cd0b77f34709ff +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0011013120/cfg.json b/drc33/jl6bq8ih/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5c7ded8ee914b24704f9d761bdc6d93a5ca9281b --- /dev/null +++ b/drc33/jl6bq8ih/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0011013120/model b/drc33/jl6bq8ih/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..397bf5fedb8e84fdb6620a3fbbdbb41e39b281ed --- /dev/null +++ b/drc33/jl6bq8ih/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed08d7300d7668901fab282d61a5d0ed31f18ab42e070c00e4a99e5e14bb5455 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0012016640/cfg.json b/drc33/jl6bq8ih/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f145202272f70f6ea19e995b42f45496f3fc1524 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0012016640/model b/drc33/jl6bq8ih/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..ac15ffe96a1fd66dc839ffd213bea249d768889c --- /dev/null +++ b/drc33/jl6bq8ih/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196a825a2040f45f39de6b429e0d2b6742e854aa3dfdacd2d8b1165d8d3ce1f3 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0013015040/cfg.json b/drc33/jl6bq8ih/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..83a1c1edee73f3df12352f46f65f90ef38a4b976 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0013015040/model b/drc33/jl6bq8ih/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..e5679cfb5c7c3f95a871f46f205683b454158546 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac48cacea17453b1a8053e188a2be075710b5b03971dea18ddcb2af7117e9760 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0014018560/cfg.json b/drc33/jl6bq8ih/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a4eab725c5cf963b533bf5d4a13eb9db72e062da --- /dev/null +++ b/drc33/jl6bq8ih/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0014018560/model b/drc33/jl6bq8ih/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..509a8a8270eb1fdf036709b2bba0f7ccc1cc14a1 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc12e78ed9ca5703b4b8dae46991b92fffa009e09885e4200928741ba71381f +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0015022080/cfg.json b/drc33/jl6bq8ih/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..51b78615ced0d4691c9b45639319cd075fd939bc --- /dev/null +++ b/drc33/jl6bq8ih/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0015022080/model b/drc33/jl6bq8ih/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..dc9adaf4379ac3dbcf2f49bb79e6899617be6996 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5597e208de3e43cb109e1d77d00b8c507b34f2f0707b2edc5c7918bea202eaf +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0016020480/cfg.json b/drc33/jl6bq8ih/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1fee0e6acb46846cbd95102b3ea49ffdcb1d90cf --- /dev/null +++ b/drc33/jl6bq8ih/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0016020480/model b/drc33/jl6bq8ih/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..06c55224b8df297b7550789a1cc6126cc6572e73 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d4cd32d5d06a2e9b9cb313fdccd7a8e5aa9b0476247b6b9fbbebaf22c8a68c +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0017024000/cfg.json b/drc33/jl6bq8ih/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..49b96a3c483778f3c63ab346b1c6197a7fc63ecd --- /dev/null +++ b/drc33/jl6bq8ih/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0017024000/model b/drc33/jl6bq8ih/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..f5b4099ac809e04732c2d47269e71bcfa7735fac --- /dev/null +++ b/drc33/jl6bq8ih/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50eb81cf40bf244d742bc07077c18a383cd597d2e28f28a3d2529fb9822258a +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0018022400/cfg.json b/drc33/jl6bq8ih/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..890fe1bf58fe9c5f85875a4081588a49456ec29f --- /dev/null +++ b/drc33/jl6bq8ih/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0018022400/model b/drc33/jl6bq8ih/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..b9a99a8441a778fc1c2e368f0ada568ad6a1c4aa --- /dev/null +++ b/drc33/jl6bq8ih/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0ae6941ef4047cae9f8eed30cbdba3c4f5af84d442fb8502bb0a2dec51fa19 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0019025920/cfg.json b/drc33/jl6bq8ih/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1164d77e565532439362675b8ef4fad75fd57ecc --- /dev/null +++ b/drc33/jl6bq8ih/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0019025920/model b/drc33/jl6bq8ih/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..bc9fae43d59443cd7b7e3088acf6f51b916f53f4 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1362883e6aec40a24926680db68ed532029886a0a9f92ef0fe76932a731c50d4 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0020029440/cfg.json b/drc33/jl6bq8ih/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f0572cbd51044c13c2aa200321061f76cd499e61 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0020029440/model b/drc33/jl6bq8ih/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..740b5c048b808e010aaeaa09b09b0fceada3857e --- /dev/null +++ b/drc33/jl6bq8ih/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3fc9a9a464b2041bba7a3b3dfa32e16ec65023bc32dc89576dd07a95886f2c +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0030044160/cfg.json b/drc33/jl6bq8ih/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d4f88448264bd2634fdaa50459779a52572b05 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0030044160/model b/drc33/jl6bq8ih/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..d1a7cb63f798c442114f6b510f86dc4870fe92e7 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ea8caac68fa4a6fe7d222aa8344b2b011a9a9f8fcb0a20a57f22d2bbfb49f8 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0040058880/cfg.json b/drc33/jl6bq8ih/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f5d6544709189172087c880cbaad38417620156e --- /dev/null +++ b/drc33/jl6bq8ih/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0040058880/model b/drc33/jl6bq8ih/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..033c42f2dff0375d68df8473e60c7f5fc2772829 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85eae9275b7840f1b48fc5d6e3c569f95b13ae7d062c3d21cbfe390adcb6cc38 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0050073600/cfg.json b/drc33/jl6bq8ih/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1b3b61b14c216712f21825a1c9c932f7d8774137 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0050073600/model b/drc33/jl6bq8ih/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..8f7a7fbce523e8b5d13645173084381f62f30857 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aaa320b59a91d31347fa2fb5ac304b235d4e7c4d8d3c2f09c7866601a46f840 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0060088320/cfg.json b/drc33/jl6bq8ih/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8258297f65e219fe4a0ffc7d07ddc906179f4aa2 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0060088320/model b/drc33/jl6bq8ih/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..fc0939c609c2310ba36e71f481711e100bbc0989 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5cb13c9b4c1476ad8174d3b9272f4546ef63ffc47914c388a7fc225aaaa4300 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0070103040/cfg.json b/drc33/jl6bq8ih/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d477f1a1fda00f7e55c4e0fe21d12ff58f43dc33 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0070103040/model b/drc33/jl6bq8ih/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..993d26883cbfb7aecd90f5faebda9e549f36bebd --- /dev/null +++ b/drc33/jl6bq8ih/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1012db5d9ffcbe561c1d20bffe9ae7f1ec1c5f4c4e341f4b271e6a154187b4b6 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0080117760/cfg.json b/drc33/jl6bq8ih/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2011a3c214ef2c9f6f80e7cf31bcd7e9f6eb702b --- /dev/null +++ b/drc33/jl6bq8ih/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0080117760/model b/drc33/jl6bq8ih/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..12d1ed4a420eda98e8554da499ec6163f76b4bbf --- /dev/null +++ b/drc33/jl6bq8ih/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c582f04c60b774a5af7dfdccb38fef2c9a6f24b8ef714dbaced209788e39f547 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0090132480/cfg.json b/drc33/jl6bq8ih/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..768eeb5f1db9df5fc2f251940939185a38672c04 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0090132480/model b/drc33/jl6bq8ih/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..d3cbf72d7e10ce8a5d8457deaf763b75eb657a35 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6971a43e64accadc278cb69155cbcff62ca5d9252396b3c48b7f46ad895f33 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0100147200/cfg.json b/drc33/jl6bq8ih/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..607dc32b67e2b02117cdfc82e0089a371310f5fa --- /dev/null +++ b/drc33/jl6bq8ih/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0100147200/model b/drc33/jl6bq8ih/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..ed5bd22c17ec6739731ee0156db1c5c0d803d592 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd24466b1bf1d587bfe60647877a5059f03b47f8d7a69ce0f38720deb2fafef7 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0110161920/cfg.json b/drc33/jl6bq8ih/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..aa84ba6141db2977312cc088bfc9d064230ea0fe --- /dev/null +++ b/drc33/jl6bq8ih/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0110161920/model b/drc33/jl6bq8ih/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..cbac9477e122d2d4b53f4cc17a4271ca4b05b2d7 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7481c5072f528b8be6a21d6830d6fe35d804383eb966f6214362b60957e5b77b +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0120176640/cfg.json b/drc33/jl6bq8ih/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dc4c8ccc1312e051ef864de470f0b2146a45f357 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0120176640/model b/drc33/jl6bq8ih/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..f1a0e2a560488db04dfc9b5c5cce7f7b0a2deefd --- /dev/null +++ b/drc33/jl6bq8ih/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff25dab3705dc2fcdb864aac66a22ccd26060978f594b51f59a4f73e1201db31 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0130191360/cfg.json b/drc33/jl6bq8ih/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..414108df0922f86009c222e9127916ef6bf9cb58 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0130191360/model b/drc33/jl6bq8ih/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..c9242cc612018412766e2608afafc7f1e0f35705 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06d8a32eefe825c8412c7de893ec1bcb0eaeee7151a3cdbccbaece11a4dc886 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0140206080/cfg.json b/drc33/jl6bq8ih/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6d23b4881e85d2b3019dd0464915ca5c45176970 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0140206080/model b/drc33/jl6bq8ih/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..f7c95688d916d57f0a8f3f6933b1ae086c154228 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f972ad5ffc707682eef8c79d56bfe7cb16e3b716fc6477667cbf73450a64c258 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0150220800/cfg.json b/drc33/jl6bq8ih/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e50c9f9a39f7dd9c97102f24fd3f301d1cde77ba --- /dev/null +++ b/drc33/jl6bq8ih/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0150220800/model b/drc33/jl6bq8ih/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..670ee19eaa86a3218168966269f0d0e0ebb9dba8 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f10b9e0c644f0773311bc9bf4ce724ffe4c7cb160349a15edf16bf2de990494 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0160235520/cfg.json b/drc33/jl6bq8ih/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..485b910edd68eeb6ddf801e52313e01330dd0f97 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0160235520/model b/drc33/jl6bq8ih/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..4efeb01b4ade046f7131f83335b78497b51b90af --- /dev/null +++ b/drc33/jl6bq8ih/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72acbed7cff598c41129e183ee7e8d64177d84243f8fb3df29dc0c5c26b9a29a +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0170250240/cfg.json b/drc33/jl6bq8ih/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..35cfe4a8d2facf70d4acac19e77f971715b4f27b --- /dev/null +++ b/drc33/jl6bq8ih/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0170250240/model b/drc33/jl6bq8ih/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..878d8e394e858c34ced732dd6ad8e3310469567b --- /dev/null +++ b/drc33/jl6bq8ih/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7849238d84f5d6a2810955fe2f975e4b0f39e80e2ac7e28d03a39958c9d166bd +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0180264960/cfg.json b/drc33/jl6bq8ih/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..41324aab5a7b6b9404ab781f83e9be8836c33fe5 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0180264960/model b/drc33/jl6bq8ih/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..f8ddb97f79c590a0aaff6674bae34a6ecf73db55 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f05ddfd8c7559b9e4aa2bccc03d1b24cd4741100a1c92dc2031eef0c3bbe6b +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0190279680/cfg.json b/drc33/jl6bq8ih/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fd64bc32375763dcde9d1afb9f9cdbc976308650 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0190279680/model b/drc33/jl6bq8ih/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..e816f338924f4092001383ed3be680164d0cdb0a --- /dev/null +++ b/drc33/jl6bq8ih/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d6ff22a12077c73607de5589377d73fad86340c39453c417337cb708c436a7 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0200294400/cfg.json b/drc33/jl6bq8ih/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6fc4cd19336edf3395539578f5cc9455d8f21cf4 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0200294400/model b/drc33/jl6bq8ih/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..db8e82085b41882080333be3175ba6b57c83d2fb --- /dev/null +++ b/drc33/jl6bq8ih/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6801b6f90335669235624c899394b0b53c7df2f2a2dae9733313e031902f9abd +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0300441600/cfg.json b/drc33/jl6bq8ih/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b5cd16a7616124014c9801438bdd3548f68c4b33 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0300441600/model b/drc33/jl6bq8ih/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..ee9b458b332e88c1d6a61cdbec7c89704213bc85 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7e86787933af503662ffff21e49bfbac3c3fdd76576baf38f4a1b829e02eb4 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0400588800/cfg.json b/drc33/jl6bq8ih/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5f3b30c7489b02e0fb7a840824198d8adaba4c66 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0400588800/model b/drc33/jl6bq8ih/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..6ec673f5284050a6b6bd8fee4bbdbd532c506bc3 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c01583ef6885d60aff4b3f7d4478c3ed20978af92add81b03fafad0927f0c5c +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0500736000/cfg.json b/drc33/jl6bq8ih/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d13d2ef92601f33b283e813adc1a3a57e46918bf --- /dev/null +++ b/drc33/jl6bq8ih/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0500736000/model b/drc33/jl6bq8ih/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..6952a0a4638fdc3a05df92ce9e552d3b39414282 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d8ff0b1e7594aa9e847c6f63ea98fd4e6969da44a7e875a6d2255df42ad8b0 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0600883200/cfg.json b/drc33/jl6bq8ih/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5b6c0909b3ae9e36df88790a851b923591bb51e1 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0600883200/model b/drc33/jl6bq8ih/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..70579ec2e9def87d61261e872294d027eef01fde --- /dev/null +++ b/drc33/jl6bq8ih/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5b2991fa4c9bf3e51d699fbbb5b1de9e8c6e158d7afa283a70860475f9f108 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0701030400/cfg.json b/drc33/jl6bq8ih/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1882bda413d57b158a3cc1fdadf11633bdfa5d81 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0701030400/model b/drc33/jl6bq8ih/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..469af437732926b11282b586ffaf31c35ccf1bb2 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723d2d2de2d16b3f8f77541def5199e722d186b3650bc282ccdd0daf083f7d09 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0801177600/cfg.json b/drc33/jl6bq8ih/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a69bf418df61b71281cd04ff4d4c939ca03dcf09 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0801177600/model b/drc33/jl6bq8ih/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..4c8b24f7c8a2b23efb1dd44f8ec798d794d19035 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fe0ff3e3ba34c12059dfd91aa732b3d7a2a19d72180caaa2dcab86da771aa0f +size 20566089 diff --git a/drc33/jl6bq8ih/cp_0901324800/cfg.json b/drc33/jl6bq8ih/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b8f55357609748dfe8931cd1597d89b56a484992 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_0901324800/model b/drc33/jl6bq8ih/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..7f537c4b8f665d5d79b1806182e446144dd82375 --- /dev/null +++ b/drc33/jl6bq8ih/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd1aed318f5332b843219f5e1047ab3847ecf48ae67cbf538ba50e3b9fb0a2c +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1001472000/cfg.json b/drc33/jl6bq8ih/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b64bb44fff2bc1894ffd8c41463811e7e690e0 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1001472000/model b/drc33/jl6bq8ih/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..862680781ef8403b0e59b584c1c7804770615d88 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d952f74a16b35a36a3a82b3d610adef3fdb851f8ade6c811350b8f1b55dae5e +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1101619200/cfg.json b/drc33/jl6bq8ih/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..73e05e31a3f0fe1789b4344859d706245395c8ea --- /dev/null +++ b/drc33/jl6bq8ih/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1101619200/model b/drc33/jl6bq8ih/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..2cbb5e4149b53097f881ddec0b4f370fffb5d35c --- /dev/null +++ b/drc33/jl6bq8ih/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee6a7b4fa64690678b3f66975e24857f8f9ea28b7b767f86de392002293bd7f +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1201766400/cfg.json b/drc33/jl6bq8ih/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..438c18c13e7e7e2cb36968f3eef51354a4539396 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1201766400/model b/drc33/jl6bq8ih/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..7cb28a5ee9e7afe99410d53a60762ea7145ee37e --- /dev/null +++ b/drc33/jl6bq8ih/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a0621f622323bff61a124d358c72bfddbe0333e61dc85a6b7b22f84d120812 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1301913600/cfg.json b/drc33/jl6bq8ih/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5d129b2e0ba6e854623a5175a194cfbdbda6a549 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1301913600/model b/drc33/jl6bq8ih/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..7ca8b8b0cd29ff4eda79318d666cb87a924eae93 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7635a38160ba1c8647ae3bcaf915b99c246b82adf0ea1c0daec31d0ceb7e4fbc +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1402060800/cfg.json b/drc33/jl6bq8ih/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a8ef498c28f0219b4c0f724f441b65138e89287f --- /dev/null +++ b/drc33/jl6bq8ih/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1402060800/model b/drc33/jl6bq8ih/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..e76ea864fa3a37775617e6a70d46a29392c2c389 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec602c64f62dfbdc3d2bfc32abce10a05c3fa99a71cf7a8faf35ffcb9da1d02d +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1502208000/cfg.json b/drc33/jl6bq8ih/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c7954927724792719fb0b2327cd35640d0684540 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1502208000/model b/drc33/jl6bq8ih/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..4b5f40154a23bf97893c7aabc23d445e69f4d3de --- /dev/null +++ b/drc33/jl6bq8ih/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a130bcc6a49bd612b30882a4ac7a4f909818ea5238fec74e8f9fef18e455e00 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1602355200/cfg.json b/drc33/jl6bq8ih/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4a20161df26f5c2e3ca7d5ba3e8d70729dfe0d1a --- /dev/null +++ b/drc33/jl6bq8ih/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1602355200/model b/drc33/jl6bq8ih/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..4b1bd76e166c1108f590f38d487b4dbb5bb7cf30 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:125cf98346e8da8299c4a436f32af570b201faeb87afd5260cd6477ca05302a4 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1702502400/cfg.json b/drc33/jl6bq8ih/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b2e7037a21cea051487b1c3fa9df6b80b4d52906 --- /dev/null +++ b/drc33/jl6bq8ih/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1702502400/model b/drc33/jl6bq8ih/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..e58219983422f243a5b9f1d511bb5bb8da3dcf6f --- /dev/null +++ b/drc33/jl6bq8ih/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:921cd3a081851a932ebe29b1c7ee1125857f776fcff6c0756599775363ca69ab +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1802649600/cfg.json b/drc33/jl6bq8ih/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fcfdf1b83e37a028b2f4df82f6abfbae2998ff9d --- /dev/null +++ b/drc33/jl6bq8ih/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1802649600/model b/drc33/jl6bq8ih/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..ac9604c9f9bb026716cf6af68de632779f42646e --- /dev/null +++ b/drc33/jl6bq8ih/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f9873d72f121983fbd59df56fd91f6f7faabc991d5514300b940fdd76c4812 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_1902796800/cfg.json b/drc33/jl6bq8ih/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2e90e8d55d0361bb155428281ac91ee9470fad2a --- /dev/null +++ b/drc33/jl6bq8ih/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_1902796800/model b/drc33/jl6bq8ih/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..d25a6ea18959c879d50efb3890a56413095607cb --- /dev/null +++ b/drc33/jl6bq8ih/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3752c4d808b07d7ea5465aff9ad918ec5bc357c10bd30a6986ef4cc32ea2ff7 +size 20566089 diff --git a/drc33/jl6bq8ih/cp_2002944000/cfg.json b/drc33/jl6bq8ih/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8f9256d17ed51faf78ea76abdf6b785b432230e4 --- /dev/null +++ b/drc33/jl6bq8ih/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc33/jl6bq8ih/cp_2002944000/model b/drc33/jl6bq8ih/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..b643af30d01ad6f48a0e379a1200650f80e16df4 --- /dev/null +++ b/drc33/jl6bq8ih/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7622ab57c7412b4cd3ee17c9ae2fda742b62f5b84efcfd538ba57d6d92c22c +size 20566089 diff --git a/drc33/q4mjldyy/cp_0000998400/cfg.json b/drc33/q4mjldyy/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae409a58c384f5ef48f725831899d0ee4a03143c --- /dev/null +++ b/drc33/q4mjldyy/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0000998400/model b/drc33/q4mjldyy/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..b379895dbb36b7e7fd801463b8f9000016b6d302 --- /dev/null +++ b/drc33/q4mjldyy/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d876c27413f28bad9f4aa1098a8aad75dc5fe0eedeb9fcb6f68b7cc29a86c992 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0002001920/cfg.json b/drc33/q4mjldyy/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b10703723b4dfc17b867362f443479b0f24be1c2 --- /dev/null +++ b/drc33/q4mjldyy/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0002001920/model b/drc33/q4mjldyy/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..7605a23572e39c4d1b921846881e0b095c4a39f9 --- /dev/null +++ b/drc33/q4mjldyy/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696a619e0dc5c1e79c3b260d4b525374adc174e52c0a8980216cb8cf8eb8e711 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0003000320/cfg.json b/drc33/q4mjldyy/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..eac2784a32c48dbfe1db50f3da7fee973761952b --- /dev/null +++ b/drc33/q4mjldyy/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0003000320/model b/drc33/q4mjldyy/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..b96b62c74d035c04ecea8d1e03f5fce3b716109c --- /dev/null +++ b/drc33/q4mjldyy/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acba612bf81cab63452823b1ff4fc3c56f7b7f42e4fbfb44690f0e4b9fbd7d62 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0004003840/cfg.json b/drc33/q4mjldyy/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dd2a9c1ebb85991f8ca4041e86e2392276851b61 --- /dev/null +++ b/drc33/q4mjldyy/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0004003840/model b/drc33/q4mjldyy/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..83d78af574f9647dd4a88ad8c4daff4632418437 --- /dev/null +++ b/drc33/q4mjldyy/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45674b5f81c145022abc055fef73b032eea784b28f5e4536198b0dcc826952f3 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0005007360/cfg.json b/drc33/q4mjldyy/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2d4894cbd480fa1642ae77ad25c7e9e6d759a0b4 --- /dev/null +++ b/drc33/q4mjldyy/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0005007360/model b/drc33/q4mjldyy/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..f36168058cf8825f37c78df530c1aa1a2bf3a208 --- /dev/null +++ b/drc33/q4mjldyy/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5017656d776bc474fb530db0fa0c773cfa6ca8145c6a9233fd6656ccf21139ed +size 20566089 diff --git a/drc33/q4mjldyy/cp_0006005760/cfg.json b/drc33/q4mjldyy/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..30f86ea4454da7989d95205abd67de51474e4c5d --- /dev/null +++ b/drc33/q4mjldyy/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0006005760/model b/drc33/q4mjldyy/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..4c4c7094efd76703fb9d87d42b6f6c3f8ebbbfec --- /dev/null +++ b/drc33/q4mjldyy/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b209db8d2d32a0679a83cd650df6ad3d15036842dd6315a5f7ec303f138e7b +size 20566089 diff --git a/drc33/q4mjldyy/cp_0007009280/cfg.json b/drc33/q4mjldyy/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f2b4441324c2000512f2890d5654d3d55f34a026 --- /dev/null +++ b/drc33/q4mjldyy/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0007009280/model b/drc33/q4mjldyy/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..f973e06b5f1a32ffea95320bec18df418b8ccac1 --- /dev/null +++ b/drc33/q4mjldyy/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb6e0458ec4e7f66dcd1aa45e641e577c1ba56693ca3974d9ea40d2ce029fbe +size 20566089 diff --git a/drc33/q4mjldyy/cp_0008007680/cfg.json b/drc33/q4mjldyy/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b8b8d4120ba24831ddb5a5582a3b444004ade70b --- /dev/null +++ b/drc33/q4mjldyy/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0008007680/model b/drc33/q4mjldyy/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..36541f1973f2dcbc229a99437c1a77d791ecbee6 --- /dev/null +++ b/drc33/q4mjldyy/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109f0df956458019a99e196b7563dc851fe728c0b609217375980cb1d58d0057 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0009011200/cfg.json b/drc33/q4mjldyy/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..14e2ea07c053ac09e96178775762dfc8f1a03cf8 --- /dev/null +++ b/drc33/q4mjldyy/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0009011200/model b/drc33/q4mjldyy/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..90fddd3c8b728ad72b07121e9de2746d4566ee2d --- /dev/null +++ b/drc33/q4mjldyy/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e35f1181338b9517c9fd9023aa6bcead710b7952586692747030be9bf1886fb +size 20566089 diff --git a/drc33/q4mjldyy/cp_0010014720/cfg.json b/drc33/q4mjldyy/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fddd715ba1eee4a8cfb2d64804705e511577fc9e --- /dev/null +++ b/drc33/q4mjldyy/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0010014720/model b/drc33/q4mjldyy/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..c88db712418ae8d09922bdc03017d38cb8a8ff57 --- /dev/null +++ b/drc33/q4mjldyy/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16ed15908c627c06d08d7d793f362596708efaf373dc8a06a34e3454c7e3aa0 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0011013120/cfg.json b/drc33/q4mjldyy/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..af5feb23b0d5e047211e2246272929f725bfb842 --- /dev/null +++ b/drc33/q4mjldyy/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0011013120/model b/drc33/q4mjldyy/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..6386aa088837fb04a2a3eee1817203887030ce49 --- /dev/null +++ b/drc33/q4mjldyy/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47cf780d5de5777824d34b70daa7b2f5ac3ae691150d701fa639e35b2e3249d +size 20566089 diff --git a/drc33/q4mjldyy/cp_0012016640/cfg.json b/drc33/q4mjldyy/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a1e12879943c1cedbc704060e53adab95562528f --- /dev/null +++ b/drc33/q4mjldyy/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0012016640/model b/drc33/q4mjldyy/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..b0e5ddd9436d1d3146b360fdbbc710e44bb2a90c --- /dev/null +++ b/drc33/q4mjldyy/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed9284d1f25aa6d3a2f5f2918f1a1442523260d248f2be79928119d044ee933 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0013015040/cfg.json b/drc33/q4mjldyy/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c9cce780efd72d348a3de7e13b1e543f5ac82201 --- /dev/null +++ b/drc33/q4mjldyy/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0013015040/model b/drc33/q4mjldyy/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..28c9a8623d1bb1640ec0c6c895e5135f0714fe67 --- /dev/null +++ b/drc33/q4mjldyy/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:121da9d2d8d58fad22057dd9e0ae75e2ffda58ffa845c0ea93f6db04846b2891 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0014018560/cfg.json b/drc33/q4mjldyy/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3894d8367ce2d4c5af37b3f9490dfcdf10de8c3a --- /dev/null +++ b/drc33/q4mjldyy/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0014018560/model b/drc33/q4mjldyy/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..04384d9b145053e6a2ff6b4c337f70b4873dd757 --- /dev/null +++ b/drc33/q4mjldyy/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17bbd3a4213a239dabc2c5f079143a6d02aef6a89333a888a930c02bbe2de8a +size 20566089 diff --git a/drc33/q4mjldyy/cp_0015022080/cfg.json b/drc33/q4mjldyy/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c90020befa18f6443f3adad6e925bfcd37b38fca --- /dev/null +++ b/drc33/q4mjldyy/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0015022080/model b/drc33/q4mjldyy/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..b6f7a8aae0bf444891e084fde3aaca7c2d41b5fa --- /dev/null +++ b/drc33/q4mjldyy/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d55640c109d9c73fcbee385b5bb0d92862179e75da9b5ce0c83b3c15f3a6811 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0016020480/cfg.json b/drc33/q4mjldyy/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5054113c829d44bf9f80d63efd188924ac45d828 --- /dev/null +++ b/drc33/q4mjldyy/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0016020480/model b/drc33/q4mjldyy/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..5946f951d5a1e0ee207fca016c757077fdedd3da --- /dev/null +++ b/drc33/q4mjldyy/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d585c395f41b8c4de662bb9f6ee41363032206c1b3cd99dba77989b27b738ee +size 20566089 diff --git a/drc33/q4mjldyy/cp_0017024000/cfg.json b/drc33/q4mjldyy/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9b0ec73acd751fe90fba283b0afce6120e24d099 --- /dev/null +++ b/drc33/q4mjldyy/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0017024000/model b/drc33/q4mjldyy/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..41acf24e1c0c1e0ec5b0165f8f647b8aadb1576d --- /dev/null +++ b/drc33/q4mjldyy/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b492dc1f1dd79dd40f63e8527abf7c5777d9d98899ea6d3c2a31b6bb873764f +size 20566089 diff --git a/drc33/q4mjldyy/cp_0018022400/cfg.json b/drc33/q4mjldyy/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c079441255d34efe9897ddab70940cd46181d62c --- /dev/null +++ b/drc33/q4mjldyy/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0018022400/model b/drc33/q4mjldyy/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..32a0720a63ea25a9c0849375166ca7cdcd267af0 --- /dev/null +++ b/drc33/q4mjldyy/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd50c8b362cd798c3061c3f3247cb4225b71d48918ae0ea5de9194423756190 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0019025920/cfg.json b/drc33/q4mjldyy/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7fe4b1ce7b49f8ba9f8593f7f47e54b27d6e9c0e --- /dev/null +++ b/drc33/q4mjldyy/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0019025920/model b/drc33/q4mjldyy/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..87e5f4949795691ffbb518fb1155f6906ec8dce3 --- /dev/null +++ b/drc33/q4mjldyy/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e4d1e0696f02ff08b8a36a260ebf936321e071d7b1a45c9574dfe2e54d1769 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0020029440/cfg.json b/drc33/q4mjldyy/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..adbabe3d14cc434d85cd9f1f0c5021dbe93f909b --- /dev/null +++ b/drc33/q4mjldyy/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0020029440/model b/drc33/q4mjldyy/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..97b6b67dbe1fac3ee4f7535f03fbca4c918c3a99 --- /dev/null +++ b/drc33/q4mjldyy/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204f296aa2ba415fecabc55e8e567b48aabae198bd6f5398ec05135be94dc53c +size 20566089 diff --git a/drc33/q4mjldyy/cp_0030044160/cfg.json b/drc33/q4mjldyy/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d23e01173e20e9d489c38258bdbde810ba398b34 --- /dev/null +++ b/drc33/q4mjldyy/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0030044160/model b/drc33/q4mjldyy/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..b29e9bfcaee4246383051ba913a01aa24c811d5e --- /dev/null +++ b/drc33/q4mjldyy/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8213fc00e256b350a4efb29436e23296cddc4f916ab8c8d1dbbfe5ea4d806b5 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0040058880/cfg.json b/drc33/q4mjldyy/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..28b956236077970da6db418a8251443d4377fa7d --- /dev/null +++ b/drc33/q4mjldyy/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0040058880/model b/drc33/q4mjldyy/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..4a78b50c881a8ca641efa0b49a1559a66cbb0461 --- /dev/null +++ b/drc33/q4mjldyy/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:709088f7bf5cefd2847735f407bae70a8dff1521ae4c42cfe8b32cc37ead4397 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0050073600/cfg.json b/drc33/q4mjldyy/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..34db9a7f0d9f3d534bcbb66fe262240e4366027b --- /dev/null +++ b/drc33/q4mjldyy/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0050073600/model b/drc33/q4mjldyy/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..60a1d709433b6ace91e84a5205ecd2cc61d17990 --- /dev/null +++ b/drc33/q4mjldyy/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64205b7e3219b40e1bcaf223cf32f581b3ea72a7c015e5b278874f6766844d65 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0060088320/cfg.json b/drc33/q4mjldyy/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fae2b8fa1e1e5f8bf635fe04f4a7b78f7d619523 --- /dev/null +++ b/drc33/q4mjldyy/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0060088320/model b/drc33/q4mjldyy/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..44ddf2512f830f212262b0227d1abd99ac683680 --- /dev/null +++ b/drc33/q4mjldyy/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5f2a090dccc68b4527dc1dba8ff9dc53a7df96fec8bac77d45f6f828011d21 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0070103040/cfg.json b/drc33/q4mjldyy/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b08ecad89c24d30d67781edf834327fc91443d2b --- /dev/null +++ b/drc33/q4mjldyy/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0070103040/model b/drc33/q4mjldyy/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..0e026b4109872a2cdf63c13dccace45f9d2f6a6a --- /dev/null +++ b/drc33/q4mjldyy/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c333c8c1e6e9aebee3c10f134d3b4a754cf75f3877769680e6a7626e0af70b +size 20566089 diff --git a/drc33/q4mjldyy/cp_0080117760/cfg.json b/drc33/q4mjldyy/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4963769136d7ef2424927c640bfbdc7f1499facf --- /dev/null +++ b/drc33/q4mjldyy/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0080117760/model b/drc33/q4mjldyy/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..110ec26cb6f172f879c52fed78446cd5575631a3 --- /dev/null +++ b/drc33/q4mjldyy/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa843a76140dc9843df3b572681dc844c724dacda84bac0eb64433d872c337f0 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0090132480/cfg.json b/drc33/q4mjldyy/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..17e5283a254d8d643c5f2d0b1edccc02cb8ba9b3 --- /dev/null +++ b/drc33/q4mjldyy/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0090132480/model b/drc33/q4mjldyy/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..824217113b63f58b434eedb3c9867a9d9e096bb7 --- /dev/null +++ b/drc33/q4mjldyy/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe40b2028135c0731995966dff348f79b8a39ce1345180dac7df9287471275a +size 20566089 diff --git a/drc33/q4mjldyy/cp_0100147200/cfg.json b/drc33/q4mjldyy/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cfc3febfac731c2e33c0e4d4858071b7c6ba999d --- /dev/null +++ b/drc33/q4mjldyy/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0100147200/model b/drc33/q4mjldyy/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..9f2646f1abed73516b69ceefcf00940e0a8cb244 --- /dev/null +++ b/drc33/q4mjldyy/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cca583fde5f612768044fde00f6ad21711d57d628c6d0a9cff2af94092c7e63 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0110161920/cfg.json b/drc33/q4mjldyy/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..904464ba187132b57d28e6aa1a5a23df7cb3467f --- /dev/null +++ b/drc33/q4mjldyy/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0110161920/model b/drc33/q4mjldyy/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..43d95182b76db7037d3e3df6a56c66f6dcd91cee --- /dev/null +++ b/drc33/q4mjldyy/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e408846438234aa7d25a5e1a2433c327abb03d79d7a3bb1f6cfc0ef7010ab6c +size 20566089 diff --git a/drc33/q4mjldyy/cp_0120176640/cfg.json b/drc33/q4mjldyy/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8296f2e030735501b4c0eea8c12dfef7b1d5a634 --- /dev/null +++ b/drc33/q4mjldyy/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0120176640/model b/drc33/q4mjldyy/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..e97a7bb487e80e59145563f0ab323ed56237b0c8 --- /dev/null +++ b/drc33/q4mjldyy/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ab3b581b6067d2d01ab0530178a58ab990c51a62ffb2f33afc707d1ccedd6f +size 20566089 diff --git a/drc33/q4mjldyy/cp_0130191360/cfg.json b/drc33/q4mjldyy/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6527368ead4bb12865101b6ff0a6ba6c35c034e1 --- /dev/null +++ b/drc33/q4mjldyy/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0130191360/model b/drc33/q4mjldyy/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..f8fdb9d9e09bfeb1f3f79f5034f90a35f4cec484 --- /dev/null +++ b/drc33/q4mjldyy/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d945c55d62d1630303c23e86ba34c668e7c0092803514897493669c9684a3e0 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0140206080/cfg.json b/drc33/q4mjldyy/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..208510ef80bbcd629be2728a1ea1d3b63c7f54d1 --- /dev/null +++ b/drc33/q4mjldyy/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0140206080/model b/drc33/q4mjldyy/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..5456dc4b9f6811bfcb4c9b0bbd5b35e9bce33af5 --- /dev/null +++ b/drc33/q4mjldyy/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47bdbce0feb374532bbb0d83bbaaf6271ab8300fda61ee77b7aa049b60542567 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0150220800/cfg.json b/drc33/q4mjldyy/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f5d898b32c5d2b92b347f1185bb2e54e139b8ac4 --- /dev/null +++ b/drc33/q4mjldyy/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0150220800/model b/drc33/q4mjldyy/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..45cfd7f0c462d3229e820688fb6d08a2519ccb0a --- /dev/null +++ b/drc33/q4mjldyy/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc535209d8b34f0593c0c015e92bdc7eca0263ff8b0716b9ec7bbb0c505580e8 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0160235520/cfg.json b/drc33/q4mjldyy/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c3773121c50711d0f6b9fde9384646c8f854e765 --- /dev/null +++ b/drc33/q4mjldyy/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0160235520/model b/drc33/q4mjldyy/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..810f2cf7c711de30a08eb6eaa5f9319f6b98e888 --- /dev/null +++ b/drc33/q4mjldyy/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30fd61efbc5d5040272bfecd4aa3002a6da090070ea54771081cdab875c47896 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0170250240/cfg.json b/drc33/q4mjldyy/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..26141f76f276bc187b2293943d83817fc21bb87a --- /dev/null +++ b/drc33/q4mjldyy/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0170250240/model b/drc33/q4mjldyy/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..c4f9a17b38369d571ef68197958d1823de2e4b76 --- /dev/null +++ b/drc33/q4mjldyy/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ef15468bc3e9f3e90e54f48df3760c9fa0a0e4ec0afc0d073061300cf1ca5d +size 20566089 diff --git a/drc33/q4mjldyy/cp_0180264960/cfg.json b/drc33/q4mjldyy/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d9d613a71e6cbb779104552ca6ec26d2ed48ab56 --- /dev/null +++ b/drc33/q4mjldyy/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0180264960/model b/drc33/q4mjldyy/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..7e860e12984fa092800160f6e0a992d4c2581ba3 --- /dev/null +++ b/drc33/q4mjldyy/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7049618f5bc0b9eb9e8f87cf6326cedf6b1bf8a671943314ab7b0af80e94a3f2 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0190279680/cfg.json b/drc33/q4mjldyy/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..20f71d53fd478d7c52c4a39cc95159128d1a201a --- /dev/null +++ b/drc33/q4mjldyy/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0190279680/model b/drc33/q4mjldyy/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..c529f92ef8afc4203886c32edd3ae220c6b5c4c2 --- /dev/null +++ b/drc33/q4mjldyy/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49a2299d0b6b5e06a3c2838afa46c367bb9aff106592b3fc3660f1a055117d31 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0200294400/cfg.json b/drc33/q4mjldyy/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..88b543d246e8f0795683b9da49a0fe2d8c3cca32 --- /dev/null +++ b/drc33/q4mjldyy/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0200294400/model b/drc33/q4mjldyy/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..653dfec430797b9ec0fd505060dab37866a467a3 --- /dev/null +++ b/drc33/q4mjldyy/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa49df1a63053474348e3ea9b9b35708378e61ce145fce24ea333db13fde838b +size 20566089 diff --git a/drc33/q4mjldyy/cp_0300441600/cfg.json b/drc33/q4mjldyy/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0f658e60f9c2d241c2ad96f0bbf991d71d627410 --- /dev/null +++ b/drc33/q4mjldyy/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0300441600/model b/drc33/q4mjldyy/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..b1810dbbaac38acf143eb9ca2f937d570affa925 --- /dev/null +++ b/drc33/q4mjldyy/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e932b5ee715db293c8dc2228efcbc924ae350802cf3a29728ef529d28e80819f +size 20566089 diff --git a/drc33/q4mjldyy/cp_0400588800/cfg.json b/drc33/q4mjldyy/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c715d5e891c808ea1a90b8807ed008e7af47d83d --- /dev/null +++ b/drc33/q4mjldyy/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0400588800/model b/drc33/q4mjldyy/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..742f5600817aa79172c263043dfc5d69a4fe5a7b --- /dev/null +++ b/drc33/q4mjldyy/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fae2b1f81046f3b9aefcbf4dc65312e4734717ef8859a1cb2381b9c4f7ee8e +size 20566089 diff --git a/drc33/q4mjldyy/cp_0500736000/cfg.json b/drc33/q4mjldyy/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d93ebabb0bc7e4a401e8951fee2ddbac45916546 --- /dev/null +++ b/drc33/q4mjldyy/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0500736000/model b/drc33/q4mjldyy/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..b026d46efc7a6b3ab65139cd08a40509dcb59470 --- /dev/null +++ b/drc33/q4mjldyy/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44fe449cc174e00dac37c8b6953b67044496d2062e7b59fb2c475ff58dbbd2b3 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0600883200/cfg.json b/drc33/q4mjldyy/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f6a21468aa8a8398a8407b27f434fda14ca1d431 --- /dev/null +++ b/drc33/q4mjldyy/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0600883200/model b/drc33/q4mjldyy/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..1489b563f46cf53e8c03d5ef70aa5bb95382794c --- /dev/null +++ b/drc33/q4mjldyy/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d7cfab289e08e1fab76c3037f4253db1228912aef6236886559f0b5c3a458b +size 20566089 diff --git a/drc33/q4mjldyy/cp_0701030400/cfg.json b/drc33/q4mjldyy/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..507025627dca61f4478c04bde4409276e8cd24be --- /dev/null +++ b/drc33/q4mjldyy/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0701030400/model b/drc33/q4mjldyy/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..bf1bec1b3ccefc89c786a11651138782e5ee28a8 --- /dev/null +++ b/drc33/q4mjldyy/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb85cc708f16375c8b926e33b34d74e210baf0646cc5b85a64193387114dfae8 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0801177600/cfg.json b/drc33/q4mjldyy/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..80def77621131918aef0cb9dc5914cf5703c5cb5 --- /dev/null +++ b/drc33/q4mjldyy/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0801177600/model b/drc33/q4mjldyy/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..5c44068a2d901e8dc25bf3e64ab146867a235336 --- /dev/null +++ b/drc33/q4mjldyy/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562c20987ae63551f4e75538d38ec15811ec646dfa28aec6963d1558791852b9 +size 20566089 diff --git a/drc33/q4mjldyy/cp_0901324800/cfg.json b/drc33/q4mjldyy/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0e4b48616e3913f69d5e157b74c9aef16f99946d --- /dev/null +++ b/drc33/q4mjldyy/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_0901324800/model b/drc33/q4mjldyy/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..2e2273b5643535e618c17f88488c2d93b119a07d --- /dev/null +++ b/drc33/q4mjldyy/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f687d63400fabaf534ee7419dc095cf970e0bdbe596ec551703aed4187e09137 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1001472000/cfg.json b/drc33/q4mjldyy/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..722f9994fe2c86db88d6ed7f263dde8df7d79db6 --- /dev/null +++ b/drc33/q4mjldyy/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1001472000/model b/drc33/q4mjldyy/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..1a85a217e6583fa866c1f3883b7b7289c36c1ec8 --- /dev/null +++ b/drc33/q4mjldyy/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb077795dca45be6047e4f18c014a5a803e7e80432b327530abab3599c2f3009 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1101619200/cfg.json b/drc33/q4mjldyy/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..589eab913183c83446c39a208957055e2b4630da --- /dev/null +++ b/drc33/q4mjldyy/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1101619200/model b/drc33/q4mjldyy/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..23d8228309b599d87fdff29755063bff238592eb --- /dev/null +++ b/drc33/q4mjldyy/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07496ed2c0648e8cdc8035b9c643e1c6d420006453fbae32649b123a3c5d740d +size 20566089 diff --git a/drc33/q4mjldyy/cp_1201766400/cfg.json b/drc33/q4mjldyy/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4117d2c7a997820b657c27c3463f22ee49a40874 --- /dev/null +++ b/drc33/q4mjldyy/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1201766400/model b/drc33/q4mjldyy/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..6676df08c32a316dec63553057fd417edee67772 --- /dev/null +++ b/drc33/q4mjldyy/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d220da26b867c0e2c43949e3787d12f38a51002b37fba140c4b7ab4229994c1 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1301913600/cfg.json b/drc33/q4mjldyy/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..72737fa47ba9ddae364b502be8badfcc5a042dc1 --- /dev/null +++ b/drc33/q4mjldyy/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1301913600/model b/drc33/q4mjldyy/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..6f3471f4b23e11c0d8fd32133f2d08f293108f54 --- /dev/null +++ b/drc33/q4mjldyy/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4234c2cdf1df348625ee05c98367afa637ad8008f7ed563a0858b892fdc03c62 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1402060800/cfg.json b/drc33/q4mjldyy/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b895e3555bfb36d5124fa77938e0a4a4c067cf2d --- /dev/null +++ b/drc33/q4mjldyy/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1402060800/model b/drc33/q4mjldyy/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..467cd4b1ce3aecec3c546a46200556507326bbbf --- /dev/null +++ b/drc33/q4mjldyy/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20d6dfbfc51d599e6e27c7154675a70e93d26d39c65c00c9aa6bf48ab8cdfa16 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1502208000/cfg.json b/drc33/q4mjldyy/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..997543895b1def33931700c2eb372dfbaa9a0671 --- /dev/null +++ b/drc33/q4mjldyy/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1502208000/model b/drc33/q4mjldyy/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..e472f38ffbdd7d0bda0a9a951fc008cf63b828e6 --- /dev/null +++ b/drc33/q4mjldyy/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63da20c8b5d8c5178dc36b17c0412bba64dc2313fe310ece116a8d9f3f471bfb +size 20566089 diff --git a/drc33/q4mjldyy/cp_1602355200/cfg.json b/drc33/q4mjldyy/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a3fb7b9562ba1e19ae00377b6b9456bb24eddb7c --- /dev/null +++ b/drc33/q4mjldyy/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1602355200/model b/drc33/q4mjldyy/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..d0a6074d3fdb91ba1d1ad18d01dacd5f2e51a225 --- /dev/null +++ b/drc33/q4mjldyy/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68cbd9943061c410cf2b10a4b976625c7bc85ac7ed040dc0c62acbcc59022657 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1702502400/cfg.json b/drc33/q4mjldyy/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..df6745255cde1a16ca181aa2ca324b0d60a200a0 --- /dev/null +++ b/drc33/q4mjldyy/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1702502400/model b/drc33/q4mjldyy/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..8261b396ff0bc5bf9b99b7720a2ecd979b609c9f --- /dev/null +++ b/drc33/q4mjldyy/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d54bb3158dbe69df0351513a7329c6ab7ba56b385700932c471039d81f0ad02 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1802649600/cfg.json b/drc33/q4mjldyy/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a08375d46cd0dd1d4873ffd6a5b828324aa71264 --- /dev/null +++ b/drc33/q4mjldyy/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1802649600/model b/drc33/q4mjldyy/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..9b77a12e91700067495f42d23dfb8e9c9cc16e94 --- /dev/null +++ b/drc33/q4mjldyy/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8002d60150df2d550c688e8810dda95c8bd975b59541ae4fdb2c3f79130ea5 +size 20566089 diff --git a/drc33/q4mjldyy/cp_1902796800/cfg.json b/drc33/q4mjldyy/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..010f4f04755d34b9481d16038284c5dd9f46b5f8 --- /dev/null +++ b/drc33/q4mjldyy/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_1902796800/model b/drc33/q4mjldyy/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..d77ca6c6140de44df4ceab3bd8fee77b69b4dc7c --- /dev/null +++ b/drc33/q4mjldyy/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ccea47062ebdb72d6affd230204dba7b3938831396dc8e61eba91b5303df905 +size 20566089 diff --git a/drc33/q4mjldyy/cp_2002944000/cfg.json b/drc33/q4mjldyy/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..188d7904396d97a84df2ddcc7a3ecd23191e6b73 --- /dev/null +++ b/drc33/q4mjldyy/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc33/q4mjldyy/cp_2002944000/model b/drc33/q4mjldyy/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..269dc8d84205d4a533d2ed300be5e1d5d5c788fb --- /dev/null +++ b/drc33/q4mjldyy/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e2c594105c4438162f4c49983e2496c5a661d7a7f6ce937b1bfd9861cddf423 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0000998400/cfg.json b/drc33/qqp0kn15/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..148f69e924d9616037433d2ddc9c17e3483128ba --- /dev/null +++ b/drc33/qqp0kn15/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0000998400/model b/drc33/qqp0kn15/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..aecbe5775f8dac7b30776bff52b0cae56fbdf936 --- /dev/null +++ b/drc33/qqp0kn15/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8a5e93494ae19142087462083c5ef12c0b572ed71e3405cd9442da4516a818b +size 20566089 diff --git a/drc33/qqp0kn15/cp_0002001920/cfg.json b/drc33/qqp0kn15/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4a25cbf6124f6960976f56af32161031e6612ab9 --- /dev/null +++ b/drc33/qqp0kn15/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0002001920/model b/drc33/qqp0kn15/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..0b12fad00b4465e062c63c66529813418ce185fb --- /dev/null +++ b/drc33/qqp0kn15/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9008d73421b4fb5b58551e782dee09298187ae15927edb7992cf654f1f6048c +size 20566089 diff --git a/drc33/qqp0kn15/cp_0003000320/cfg.json b/drc33/qqp0kn15/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..996caa784b362a4af36c68b28ee630d1f3033fd9 --- /dev/null +++ b/drc33/qqp0kn15/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0003000320/model b/drc33/qqp0kn15/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..a2e73bf221f57de8ebd22639f28fe7b858a2e889 --- /dev/null +++ b/drc33/qqp0kn15/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61620ea369542a0f0ce55708a6f89ade24ba5b105e23e0b5f9c13b74b7ad19b6 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0004003840/cfg.json b/drc33/qqp0kn15/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cf2ecb2c872f7d8b3a008a048500a7dcc22a0823 --- /dev/null +++ b/drc33/qqp0kn15/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0004003840/model b/drc33/qqp0kn15/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..aae66d790c9cbdf4d35640a33bda65fa4698b5ec --- /dev/null +++ b/drc33/qqp0kn15/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6005053823c2c09d33c28711079d1b198cc55ae8e8335dd7ef92f8cdaeba4b3 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0005007360/cfg.json b/drc33/qqp0kn15/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b6a83ebb92f3ca8e020ed0df385c1e5409a886e0 --- /dev/null +++ b/drc33/qqp0kn15/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0005007360/model b/drc33/qqp0kn15/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..dd5be0a7122e3169dcd2b5cc6e20588808c5ac11 --- /dev/null +++ b/drc33/qqp0kn15/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142f6431ec0c96121382a865d4a2a5c593f1100c4d8332a7f2140155fa2926e9 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0006005760/cfg.json b/drc33/qqp0kn15/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3aca7cbc607260d1ced05d31917b8a22b05cee09 --- /dev/null +++ b/drc33/qqp0kn15/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0006005760/model b/drc33/qqp0kn15/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..130dacedebd88ba2b9ac5f45ea2fe772a2e4056f --- /dev/null +++ b/drc33/qqp0kn15/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8549d1fd80debe746731c133e05807429ccf5c5cd5a95fdd7893ad88f96a5da4 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0007009280/cfg.json b/drc33/qqp0kn15/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..24e96c4f3f5f839304765e24d832ec5f7a7babba --- /dev/null +++ b/drc33/qqp0kn15/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0007009280/model b/drc33/qqp0kn15/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..c11e92e772273fc8f09b87b3352d692f97da6bf2 --- /dev/null +++ b/drc33/qqp0kn15/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d002b11ae062d156ec06ec5cbcfeb8aa561ce44bd356d040c04a63dcd01bf813 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0008007680/cfg.json b/drc33/qqp0kn15/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..239c8b20ebe7dc1415c5bc9fc8ffba213084e002 --- /dev/null +++ b/drc33/qqp0kn15/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0008007680/model b/drc33/qqp0kn15/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..a17c023c0b3f74ffdd7d1af97a2b3355c07826e1 --- /dev/null +++ b/drc33/qqp0kn15/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7df7437b5cbc28424b26bb9468aeba9f42215bf2dd8398687f9290d91fc239 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0009011200/cfg.json b/drc33/qqp0kn15/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9fb561fa9df4147ce35a7760aa1e619f6b91185e --- /dev/null +++ b/drc33/qqp0kn15/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0009011200/model b/drc33/qqp0kn15/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..77855b82f5f5b0aa425aa0895cb08ee67656a9fc --- /dev/null +++ b/drc33/qqp0kn15/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c203c0c29e6d769587e17b8029e046f75a588b00cf257ebaa32af91c4343901a +size 20566089 diff --git a/drc33/qqp0kn15/cp_0010014720/cfg.json b/drc33/qqp0kn15/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..aacc6c364dc3e0cb5ef03eb893773160fad1100c --- /dev/null +++ b/drc33/qqp0kn15/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0010014720/model b/drc33/qqp0kn15/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..d2fb42d7061f2ca5a0329637fe885ccf141db368 --- /dev/null +++ b/drc33/qqp0kn15/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f8c09b08538d4e7fc08ea6344629e9f105adea436a8138f2deaef4ed38c52b +size 20566089 diff --git a/drc33/qqp0kn15/cp_0011013120/cfg.json b/drc33/qqp0kn15/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9a0adfab133765eaa47c18b6ce20b4f9f2968cb8 --- /dev/null +++ b/drc33/qqp0kn15/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0011013120/model b/drc33/qqp0kn15/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..289c138a2eb8e1be159416b59a91e610ed517e86 --- /dev/null +++ b/drc33/qqp0kn15/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ef490e398b280ca3546b3a8d5c4fdf255db1eac527ef681cd8a36b7beca7ce3 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0012016640/cfg.json b/drc33/qqp0kn15/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f38b81ecf8d07fa4000e8e8a8eb3fb49e26c5b3f --- /dev/null +++ b/drc33/qqp0kn15/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0012016640/model b/drc33/qqp0kn15/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..0499756430659eb69838765bf3a04c0dc232e56a --- /dev/null +++ b/drc33/qqp0kn15/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8dbcf6cc4280487ff460914267f3bac7559bcb04fd366a41200c877dfcf858 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0013015040/cfg.json b/drc33/qqp0kn15/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d19d9eff7009028109752dd62bbd105c6dd18f76 --- /dev/null +++ b/drc33/qqp0kn15/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0013015040/model b/drc33/qqp0kn15/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..7207ead059babd5ebd72d0baac3c7b388789dfa8 --- /dev/null +++ b/drc33/qqp0kn15/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dda23c91a276cf6fc3b16129f4efe78d98f4935ce47030f7c5f344e811e67aca +size 20566089 diff --git a/drc33/qqp0kn15/cp_0014018560/cfg.json b/drc33/qqp0kn15/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..323f7c5b1176c1bbf75ff94b96c504a8e2ded386 --- /dev/null +++ b/drc33/qqp0kn15/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0014018560/model b/drc33/qqp0kn15/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..847dbf296f07a88945f2390857300df21f1109ff --- /dev/null +++ b/drc33/qqp0kn15/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb6710eeffe5c19340ab04bde0e8a80d285ce7283da4eeab6a7d165290c0155 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0015022080/cfg.json b/drc33/qqp0kn15/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fc3c474d88f5628ed4d87c6d214f371a3ff313b8 --- /dev/null +++ b/drc33/qqp0kn15/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0015022080/model b/drc33/qqp0kn15/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..eb4b1076b6c1392c61f5e81586c8309386c1433e --- /dev/null +++ b/drc33/qqp0kn15/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982c464d7e869931f73e4c793b7cb882da4b0535a25aba0ea3ce1033e6895977 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0016020480/cfg.json b/drc33/qqp0kn15/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7384aaf7359d3447d758b119c33f652de0ef6630 --- /dev/null +++ b/drc33/qqp0kn15/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0016020480/model b/drc33/qqp0kn15/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..8250ff6f5b07c4b99fd056b0bfa1c15a76e5bed2 --- /dev/null +++ b/drc33/qqp0kn15/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fb83c55a33a87bfe9205e6fa52dea5498bd1d560e4088fd3de9cc210a160ff +size 20566089 diff --git a/drc33/qqp0kn15/cp_0017024000/cfg.json b/drc33/qqp0kn15/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9b666a3685fb78c77a0193499b34211df2ecfc5c --- /dev/null +++ b/drc33/qqp0kn15/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0017024000/model b/drc33/qqp0kn15/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..1027bea361d19dac63bb2a6ed055f65d02c79bc8 --- /dev/null +++ b/drc33/qqp0kn15/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325e2778334093683ba0cf9030863ecc7c5fdc239a51716a88df0330480d5a78 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0018022400/cfg.json b/drc33/qqp0kn15/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e1feeb426bf16ed501bb8a16721a83d6f080aed5 --- /dev/null +++ b/drc33/qqp0kn15/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0018022400/model b/drc33/qqp0kn15/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..2ece967d0658e8d50edbeab4a928a4c0d465c787 --- /dev/null +++ b/drc33/qqp0kn15/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e1ed5d051f896f5261d0af74c8f757a2665a74bd670b9503ca737565aff365 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0019025920/cfg.json b/drc33/qqp0kn15/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0e7f678e19537a58df92566278783c92233a4e17 --- /dev/null +++ b/drc33/qqp0kn15/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0019025920/model b/drc33/qqp0kn15/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..15e0a7b6feb0e3843f7a2f8a6b8fbc45948951c5 --- /dev/null +++ b/drc33/qqp0kn15/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc17b5dda6b3bd3d6dc7ebfd2b89bb64b444ae13e21e38e1f880f13eea24dc3 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0020029440/cfg.json b/drc33/qqp0kn15/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2ba7517e3bf50e997634cd6382e4495212f75a62 --- /dev/null +++ b/drc33/qqp0kn15/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0020029440/model b/drc33/qqp0kn15/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..0758d0f3c8acc86791ef08d209a8b07c779ebcfe --- /dev/null +++ b/drc33/qqp0kn15/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc882dd7f0513bbcf23a651c5741a47873738cde9a738492930e22b008d921a4 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0030044160/cfg.json b/drc33/qqp0kn15/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d31b800252e4773dae6dc62abc741d046acb6c9b --- /dev/null +++ b/drc33/qqp0kn15/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0030044160/model b/drc33/qqp0kn15/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..36efc596789b611b1c756c493ce858bda056c5e4 --- /dev/null +++ b/drc33/qqp0kn15/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c767c390acec92151d10d5df0d01c9a2dc6c39e924ab55166b17b60049ba70 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0040058880/cfg.json b/drc33/qqp0kn15/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8a95b5fdd26ae66cfe1b2b2c1209d4992ac645c2 --- /dev/null +++ b/drc33/qqp0kn15/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0040058880/model b/drc33/qqp0kn15/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..1f0599bdd0995853bbb8b7e72c4b93839508592a --- /dev/null +++ b/drc33/qqp0kn15/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a0203b1187b4258450ef6cca0725576de383d3a5cf3fc499a65d2610ada090 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0050073600/cfg.json b/drc33/qqp0kn15/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..54b4aa3e857eac6eba49c75d3307c8d573c23f37 --- /dev/null +++ b/drc33/qqp0kn15/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0050073600/model b/drc33/qqp0kn15/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..f56f47e28427b85956336066d2d7ce65e795051d --- /dev/null +++ b/drc33/qqp0kn15/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdca4aa0dfe925bdbb2e2454376bca594228b84e6795645b4460be4f13deffd6 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0060088320/cfg.json b/drc33/qqp0kn15/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8a0593fe710b0afbc8e4a5df84ab1951779e2b54 --- /dev/null +++ b/drc33/qqp0kn15/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0060088320/model b/drc33/qqp0kn15/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..5c1fbe7758ebf20350e242fc953ecfb7ad19ef77 --- /dev/null +++ b/drc33/qqp0kn15/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88150820491a28944633afd741aad90f3fdc0c182a5d800a854ef07ae8a1e2e7 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0070103040/cfg.json b/drc33/qqp0kn15/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f5ac51866755e0c976bc3c6a0bf15b94af9d8b7d --- /dev/null +++ b/drc33/qqp0kn15/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0070103040/model b/drc33/qqp0kn15/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..853f9fe9717293f228ecbc7ce3579c755c6c7296 --- /dev/null +++ b/drc33/qqp0kn15/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e8dc6c6fd8d68482830c5965e03a5db8bfcea31c11e66855b63454b24fa9dd5 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0080117760/cfg.json b/drc33/qqp0kn15/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c29cf13cbdc75a346fdd0b1b789ea6a2ab81bed8 --- /dev/null +++ b/drc33/qqp0kn15/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0080117760/model b/drc33/qqp0kn15/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..241f988e3f7abde2dd10a7876336b3c94958dca9 --- /dev/null +++ b/drc33/qqp0kn15/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086b4f5517c8923e30496f545b234987f9764169728556e3c47d099e7996d8c2 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0090132480/cfg.json b/drc33/qqp0kn15/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0643849a52c14517a7380168324b5564ed91af --- /dev/null +++ b/drc33/qqp0kn15/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0090132480/model b/drc33/qqp0kn15/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..3ddc7758d8a645b73ae0c43f0105c5777fba0ca9 --- /dev/null +++ b/drc33/qqp0kn15/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424a35437ca9aa83f41643f409fade5d5b115c57cf4d65284733afea684d5194 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0100147200/cfg.json b/drc33/qqp0kn15/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..915a21dc75857de8c3773d1bd0584098f57de997 --- /dev/null +++ b/drc33/qqp0kn15/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0100147200/model b/drc33/qqp0kn15/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..f80aa7de19adcc518da249088e387d31e5dfc784 --- /dev/null +++ b/drc33/qqp0kn15/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2fa50f7d011ff8275e1984de21e37ab97446c8ec10b37089342bea63c30898b +size 20566089 diff --git a/drc33/qqp0kn15/cp_0110161920/cfg.json b/drc33/qqp0kn15/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c2be167ac036d726e8bd77fb61034096ac610692 --- /dev/null +++ b/drc33/qqp0kn15/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0110161920/model b/drc33/qqp0kn15/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..7f7b533798a5a0208bfb31c6dd57f9dafb728083 --- /dev/null +++ b/drc33/qqp0kn15/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031c6f45813b37dd27722ac0787c73654acc0a75cca7684d3475dac8eb34ac74 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0120176640/cfg.json b/drc33/qqp0kn15/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ab140e8e5270f9ad200e522fd685201c2260895e --- /dev/null +++ b/drc33/qqp0kn15/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0120176640/model b/drc33/qqp0kn15/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..a3e48184152c461afcf23a94b374fe48e6c7d9cf --- /dev/null +++ b/drc33/qqp0kn15/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b47d721a6a67f85cf3dfc2b1e67ca048763b819701914540193099ca7140ece +size 20566089 diff --git a/drc33/qqp0kn15/cp_0130191360/cfg.json b/drc33/qqp0kn15/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7096d931374176a7bb00f0849a0df43dcd28756f --- /dev/null +++ b/drc33/qqp0kn15/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0130191360/model b/drc33/qqp0kn15/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..376d6b02f052548c9aaa5afc2fc73913f2ce86a6 --- /dev/null +++ b/drc33/qqp0kn15/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbc54655afbdc0f8a8c44377bdf1834fea92e14ce7fe8073ad900f1b94113ae +size 20566089 diff --git a/drc33/qqp0kn15/cp_0140206080/cfg.json b/drc33/qqp0kn15/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bfbb1cadd49e2b5c92d910f91c8184a92107a864 --- /dev/null +++ b/drc33/qqp0kn15/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0140206080/model b/drc33/qqp0kn15/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..dab61d2e2f948e8d4b5d0a193d95f35823f68901 --- /dev/null +++ b/drc33/qqp0kn15/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b100962a718ae365e1e69b19bd9e30365d694df165f8084f0b58d3d6d3691ddf +size 20566089 diff --git a/drc33/qqp0kn15/cp_0150220800/cfg.json b/drc33/qqp0kn15/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea38c8618e090e130a833a7433ff8b6dabb7510 --- /dev/null +++ b/drc33/qqp0kn15/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0150220800/model b/drc33/qqp0kn15/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..1b3fca4f3225cc26e520925bd6cd294bc81ca16c --- /dev/null +++ b/drc33/qqp0kn15/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:077170843d589ca5d661bf9e396e9d86a87d3e8678c347b6c842186cc170cd0a +size 20566089 diff --git a/drc33/qqp0kn15/cp_0160235520/cfg.json b/drc33/qqp0kn15/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..03d18e66ca5b74d9b19dd849f78c280a15a43957 --- /dev/null +++ b/drc33/qqp0kn15/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0160235520/model b/drc33/qqp0kn15/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..d1126c6eeb024105241c3ac4dd8dceb538d4b45e --- /dev/null +++ b/drc33/qqp0kn15/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fddd70a79a78561a15b25372eb61142881fda3ba4127053a0f9a8a713a32591 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0170250240/cfg.json b/drc33/qqp0kn15/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3d72e2a5c25c7468e2771110096afef1b370752c --- /dev/null +++ b/drc33/qqp0kn15/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0170250240/model b/drc33/qqp0kn15/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..2299c6ad1fe1080e7eaf0c72de7ad5188f1a541c --- /dev/null +++ b/drc33/qqp0kn15/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0315a2d3065a7d2cbf926e867cb3aa65e8fec7cd260a042405f7195b396f6b16 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0180264960/cfg.json b/drc33/qqp0kn15/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ea830714808ec6c140d955f3b1abc1db2e78b73b --- /dev/null +++ b/drc33/qqp0kn15/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0180264960/model b/drc33/qqp0kn15/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..c147fb53e35273f2081aa3e5ad94e40bff44149d --- /dev/null +++ b/drc33/qqp0kn15/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ca2626f2b13119f99b97eed22b1f2a4cc3bda773260a137397aa458f244c0b +size 20566089 diff --git a/drc33/qqp0kn15/cp_0190279680/cfg.json b/drc33/qqp0kn15/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1a6c6f2d174623f6c85703d17d017c2551d55661 --- /dev/null +++ b/drc33/qqp0kn15/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0190279680/model b/drc33/qqp0kn15/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..7a4787c1c8109dfdd24a2af7e144c64f541ceca5 --- /dev/null +++ b/drc33/qqp0kn15/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771882aa874e09c9c0df5230605777d121d03f22c69712873e848ed99c19a0e0 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0200294400/cfg.json b/drc33/qqp0kn15/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..39f8bf7a5fc1ab3d6b3309568a9e51a7196d1607 --- /dev/null +++ b/drc33/qqp0kn15/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0200294400/model b/drc33/qqp0kn15/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..f17b13affbeb167269bc9397579ae3f72f1d8d75 --- /dev/null +++ b/drc33/qqp0kn15/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd7a52da9b60d6cb41ad346228348a010756af8477183424de47f97dfbd2e24 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0300441600/cfg.json b/drc33/qqp0kn15/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7cef8331666068c390d364f11460c8f0632f883c --- /dev/null +++ b/drc33/qqp0kn15/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0300441600/model b/drc33/qqp0kn15/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..0edd12a141cc3a43be524eef1859952c859e8a8e --- /dev/null +++ b/drc33/qqp0kn15/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e76e0c539959e542523c7ace3d31efbf7d96f4c2c7990206879175d3262577 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0400588800/cfg.json b/drc33/qqp0kn15/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3bdebe6e54118b51b7117c93eec1d276e7057362 --- /dev/null +++ b/drc33/qqp0kn15/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0400588800/model b/drc33/qqp0kn15/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..9a58a041a40b48cc7efc0c82bef52c8a518511e2 --- /dev/null +++ b/drc33/qqp0kn15/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ff0f1449e33f2bf41b9a8450583042ae0ce9156a5d0270ce35cbf7638e7859 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0500736000/cfg.json b/drc33/qqp0kn15/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6426aaf86c0fdc02ff46fcd5b0076ece7dc1147e --- /dev/null +++ b/drc33/qqp0kn15/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0500736000/model b/drc33/qqp0kn15/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..0b29be6768bdfe1172a33def38f142ab79087beb --- /dev/null +++ b/drc33/qqp0kn15/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb384434984717862f0d06d4d6f89f2cc77991d8f6e05bd29465c5ea578eb766 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0600883200/cfg.json b/drc33/qqp0kn15/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..746987fb500f09a2c828034d6808b08f10806a4c --- /dev/null +++ b/drc33/qqp0kn15/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0600883200/model b/drc33/qqp0kn15/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..844364501bd43883431bc6bc96a1b392373b3e85 --- /dev/null +++ b/drc33/qqp0kn15/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843037fcdd7095d5ede88cd66f60001d084edda52693d7f58f6cce34e2460973 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0701030400/cfg.json b/drc33/qqp0kn15/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..405c72c274419373844a6ba8480f7eac88f8cd7f --- /dev/null +++ b/drc33/qqp0kn15/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0701030400/model b/drc33/qqp0kn15/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..e16b3c824b0a434a7c2f226272cc38b91a750bd6 --- /dev/null +++ b/drc33/qqp0kn15/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dedc25aed0084d371561a35f366a1c4175d6075479e77cc2760f76be164c4f06 +size 20566089 diff --git a/drc33/qqp0kn15/cp_0801177600/cfg.json b/drc33/qqp0kn15/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3a1516eec36154d8b4a965a84e223ef6d034a253 --- /dev/null +++ b/drc33/qqp0kn15/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0801177600/model b/drc33/qqp0kn15/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..76766833849b30f15180eee2d510cf2c42037202 --- /dev/null +++ b/drc33/qqp0kn15/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e16dfb6262cd2aefd2da5ea14fe3240650c96e98e321d928fb3f81fe313052a +size 20566089 diff --git a/drc33/qqp0kn15/cp_0901324800/cfg.json b/drc33/qqp0kn15/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7fa6488bf1b18627914e207426a6a77af44ee130 --- /dev/null +++ b/drc33/qqp0kn15/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_0901324800/model b/drc33/qqp0kn15/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..d5c1586c97f3d8eb63f721102a034c3d79b7ea8a --- /dev/null +++ b/drc33/qqp0kn15/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8ea6cc64e8fc61979cecf705c21428f8589702ddb6c358affdb7a979294a86 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1001472000/cfg.json b/drc33/qqp0kn15/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..155069d458d2ce801106b96e5618c7dd582af5a9 --- /dev/null +++ b/drc33/qqp0kn15/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1001472000/model b/drc33/qqp0kn15/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..9526295babc4f4263428a33975070ef092ce3eec --- /dev/null +++ b/drc33/qqp0kn15/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15350013dd74f33131a9227c18eb8a2f95a0dd90cd1548696bb8d9ba62fd50f +size 20566089 diff --git a/drc33/qqp0kn15/cp_1101619200/cfg.json b/drc33/qqp0kn15/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..aae2a4a40560da078574bc9ce4220d1b523e72c4 --- /dev/null +++ b/drc33/qqp0kn15/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1101619200/model b/drc33/qqp0kn15/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..5f6dd95fc0da8b94f3802e9bfd68eb44db0b0301 --- /dev/null +++ b/drc33/qqp0kn15/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5067c8289a42dbebc6c9cf0d138e805905ea446525f342c0c2610d866d8d8750 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1201766400/cfg.json b/drc33/qqp0kn15/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..00fd09f555391b87c11485b81e9fef3d302eeacd --- /dev/null +++ b/drc33/qqp0kn15/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1201766400/model b/drc33/qqp0kn15/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..79798241e4c2a088037a2d8f368895be89de4af3 --- /dev/null +++ b/drc33/qqp0kn15/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a835cb150d86aa95ba60bb1aed8969fa5d23e18854db74b4a917159448fac7 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1301913600/cfg.json b/drc33/qqp0kn15/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a3ec217cd73f1973f4bf854f70fda8c57e217d5f --- /dev/null +++ b/drc33/qqp0kn15/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1301913600/model b/drc33/qqp0kn15/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..6015c0a8805683372c08079e4eaec064c991e384 --- /dev/null +++ b/drc33/qqp0kn15/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799696f5474c66208a28629781bbc83b1197bc88893871a38fd65d3693f9a26b +size 20566089 diff --git a/drc33/qqp0kn15/cp_1402060800/cfg.json b/drc33/qqp0kn15/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e4788b9e5ade0f4b2492ec64c443ff7d2fd02dad --- /dev/null +++ b/drc33/qqp0kn15/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1402060800/model b/drc33/qqp0kn15/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..5f54ab678b8fa9468513ba7f7d495cf63c4063be --- /dev/null +++ b/drc33/qqp0kn15/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e4d35af8db7159012f1aeb57a258361d319fc85b57dfee4c334bdb530248a4 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1502208000/cfg.json b/drc33/qqp0kn15/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..95d61c735a26961f195f38ff0021f2ea4d7b4132 --- /dev/null +++ b/drc33/qqp0kn15/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1502208000/model b/drc33/qqp0kn15/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..28a9d4f9f9958daf0b589ae47b392ba2b711e778 --- /dev/null +++ b/drc33/qqp0kn15/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d3525b3911a52c1a6830b6335e6bc79a2d2ed2de151f55f215d35b797c7df27 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1602355200/cfg.json b/drc33/qqp0kn15/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cb7a208f011172083c3fd9f21a915629a2fdaf2e --- /dev/null +++ b/drc33/qqp0kn15/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1602355200/model b/drc33/qqp0kn15/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..645091c643a73184053dc7fd50d2666ad9511947 --- /dev/null +++ b/drc33/qqp0kn15/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31bda37e16cf07dfcaa56f4cc54526be51c5521a4f6ac85031fc4eddf7814e5 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1702502400/cfg.json b/drc33/qqp0kn15/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3ba2aa9fad719ab673a904f75fd2ef708ea78f08 --- /dev/null +++ b/drc33/qqp0kn15/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1702502400/model b/drc33/qqp0kn15/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..e79967ffa177f5b3f877c4bd6f1a594af085683f --- /dev/null +++ b/drc33/qqp0kn15/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5af1730e316e19124bbcf10cb5e56308bca440d7fddd1c83a7c52685ab4d71b6 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1802649600/cfg.json b/drc33/qqp0kn15/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f4fa953fe789a62a8462bf2dafdd172fcec4d7eb --- /dev/null +++ b/drc33/qqp0kn15/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1802649600/model b/drc33/qqp0kn15/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..7e6634f7805fe23ddb26a8a8b12e42c647c0442e --- /dev/null +++ b/drc33/qqp0kn15/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64c7980bc83828f3e02625e08082e8c7094a99ac7cda35e2a74542f9cf2b588 +size 20566089 diff --git a/drc33/qqp0kn15/cp_1902796800/cfg.json b/drc33/qqp0kn15/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5af122b285a7778cb69514c3e9870d7d838cca7f --- /dev/null +++ b/drc33/qqp0kn15/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_1902796800/model b/drc33/qqp0kn15/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..5a29875246a07237d13eb84f8686a420bdaf4292 --- /dev/null +++ b/drc33/qqp0kn15/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882e98a15f305174fa32ea236057adcad6ff2fae1b1366fea168e79a74321975 +size 20566089 diff --git a/drc33/qqp0kn15/cp_2002944000/cfg.json b/drc33/qqp0kn15/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ab60cc71a77a3fde634f1f317acd178e44aa8dd1 --- /dev/null +++ b/drc33/qqp0kn15/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.convlstm:ConvLSTMConfig", "embed": [{"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, {"features": 32, "kernel_size": [4, 4], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}], "recurrent": {"conv": {"features": 32, "kernel_size": [3, 3], "strides": [1, 1], "padding": "SAME", "use_bias": true, "initialization": "lecun"}, "pool_and_inject": "horizontal", "pool_projection": "per-channel", "output_activation": "tanh", "forget_bias": 0.0, "fence_pad": "valid"}, "use_relu": false, "n_recurrent": 3, "repeats_per_step": 3, "mlp_hiddens": [256], "skip_final": true, "residual": false, "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/drc33/qqp0kn15/cp_2002944000/model b/drc33/qqp0kn15/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..12d3a7c701bc693061d2675f0a0bd3e63b2b7586 --- /dev/null +++ b/drc33/qqp0kn15/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63496f0a0c634d9eeee430b77c1e77e4509b5f1bff212e20cf1277ddf64ec08 +size 20566089 diff --git a/resnet/13qckf6e/cp_0000998400/cfg.json b/resnet/13qckf6e/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cb93ac3ce1e388ba8a36084a9177afe48bf4532b --- /dev/null +++ b/resnet/13qckf6e/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0000998400/model b/resnet/13qckf6e/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..c2888f967b686325aab91cf608bb727020f1aeb7 --- /dev/null +++ b/resnet/13qckf6e/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a24e748a3b3a825919a0a6511136d222b65fc6b6993c01c1f29026486dcf4779 +size 49105345 diff --git a/resnet/13qckf6e/cp_0002001920/cfg.json b/resnet/13qckf6e/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2b41226e089fe25386dfd1b8e1bc29968e2b5e32 --- /dev/null +++ b/resnet/13qckf6e/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0002001920/model b/resnet/13qckf6e/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..a5657887ca6d7fa76b2afcfe03623d205e7a468e --- /dev/null +++ b/resnet/13qckf6e/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a400eeb11848e45ee72ed34f62c4eb2f78958b804b693f652b1679e33fe0427 +size 49105345 diff --git a/resnet/13qckf6e/cp_0003000320/cfg.json b/resnet/13qckf6e/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..15db1f7a889d9c702f3675339edd861fff6137f4 --- /dev/null +++ b/resnet/13qckf6e/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0003000320/model b/resnet/13qckf6e/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..00a128aabc57999146d1b94bc39f45f320a36052 --- /dev/null +++ b/resnet/13qckf6e/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6a573455449a73a981c1e64f2a99f36dbd235a48b95e875733fadbac879ece2 +size 49105345 diff --git a/resnet/13qckf6e/cp_0004003840/cfg.json b/resnet/13qckf6e/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..131fa9425daa136ad0b97ff1bfe2b764efd6fd07 --- /dev/null +++ b/resnet/13qckf6e/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0004003840/model b/resnet/13qckf6e/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..8ad08d5f684d3fd4578bdbd54f2ae6367ccc29f1 --- /dev/null +++ b/resnet/13qckf6e/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611abc5ba1dd99c6e3ba2b9c5cad93ea8dfc6a5fd14a0f419d1e64782ab15d7c +size 49105345 diff --git a/resnet/13qckf6e/cp_0005007360/cfg.json b/resnet/13qckf6e/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e587d18a684095052935ba460fe23442f6c1c41c --- /dev/null +++ b/resnet/13qckf6e/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0005007360/model b/resnet/13qckf6e/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..cb03dc392dadf39dbce1f7b722b5302f82fa8ac7 --- /dev/null +++ b/resnet/13qckf6e/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70edfb3002d42a2a29a2599257096b84c71a58d295150d55a904f4d0ff1c189 +size 49105345 diff --git a/resnet/13qckf6e/cp_0006005760/cfg.json b/resnet/13qckf6e/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..92b07b1c53a7aea4e92c64eb433c49de2015481d --- /dev/null +++ b/resnet/13qckf6e/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0006005760/model b/resnet/13qckf6e/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..fb70f1156d26fc875a05eca2ce7ce2ea41cb78dc --- /dev/null +++ b/resnet/13qckf6e/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80f6ece2c3fb05e2f761db7c2de881133cb6e45865208103807cd810b6ec43a +size 49105345 diff --git a/resnet/13qckf6e/cp_0007009280/cfg.json b/resnet/13qckf6e/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..95dea23d39048f6ca40987b19e202062cabfc3c0 --- /dev/null +++ b/resnet/13qckf6e/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0007009280/model b/resnet/13qckf6e/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..fc68a2aeabc8049b7548bbea7b27d1407bbcbd7f --- /dev/null +++ b/resnet/13qckf6e/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:376201929a21a903b61d8609f6629b0c4364564caae0f4343d09e74aada9881d +size 49105345 diff --git a/resnet/13qckf6e/cp_0008007680/cfg.json b/resnet/13qckf6e/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..863af3c73d87c82a4567d7cd103b2f83608c0b22 --- /dev/null +++ b/resnet/13qckf6e/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0008007680/model b/resnet/13qckf6e/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..f751e47b3fcb01940314ea654e0a558226c05b17 --- /dev/null +++ b/resnet/13qckf6e/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb892336f5589fa68569c72715b829b3d8cac2d8de309f22e1b3e63c3c5207d1 +size 49105345 diff --git a/resnet/13qckf6e/cp_0009011200/cfg.json b/resnet/13qckf6e/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4e2bfe0cbafee3c54e961b2e750af443dae67744 --- /dev/null +++ b/resnet/13qckf6e/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0009011200/model b/resnet/13qckf6e/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..bfd54edca44df1a8f52eb963440dfe914ff05885 --- /dev/null +++ b/resnet/13qckf6e/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad651a6234098bf648eb6258136a1ad2d0ec847135c0095f77509d59332d42e +size 49105345 diff --git a/resnet/13qckf6e/cp_0010014720/cfg.json b/resnet/13qckf6e/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b601a6fb027907d6d76a9fc34e7ecafc0cfd75aa --- /dev/null +++ b/resnet/13qckf6e/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0010014720/model b/resnet/13qckf6e/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..a2862090f4be30308cb4906b6ae31919cf4b8a1f --- /dev/null +++ b/resnet/13qckf6e/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8caa71333d891a03443da0fcdd681220cd5a4da2c4e35641b2af693b0b70d0ae +size 49105345 diff --git a/resnet/13qckf6e/cp_0011013120/cfg.json b/resnet/13qckf6e/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c19161e9e011b207aa43d91c1bd398fb4dcb1288 --- /dev/null +++ b/resnet/13qckf6e/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0011013120/model b/resnet/13qckf6e/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..011750137211c83e0c350a020b4fc92eb39117f7 --- /dev/null +++ b/resnet/13qckf6e/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff9fb6eca12a2b4da6417336fb7c3b350c8e18bb115dca7165caa1a872c9497 +size 49105345 diff --git a/resnet/13qckf6e/cp_0012016640/cfg.json b/resnet/13qckf6e/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e1cd91c353f68aa080d7ff0c46d0ccebed6902e6 --- /dev/null +++ b/resnet/13qckf6e/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0012016640/model b/resnet/13qckf6e/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..0068c1311f91764739195ecfead393536046d738 --- /dev/null +++ b/resnet/13qckf6e/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdfb6f557b23eafad7dd892ad3475e05ca9c64f507ae25b68e5ccada0cbd8e1b +size 49105345 diff --git a/resnet/13qckf6e/cp_0013015040/cfg.json b/resnet/13qckf6e/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3613012e84e0e8bf0cac4da11f22121481b1af74 --- /dev/null +++ b/resnet/13qckf6e/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0013015040/model b/resnet/13qckf6e/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..48d34fe41379e00d946162effc64c739b545a6ff --- /dev/null +++ b/resnet/13qckf6e/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48f6b6939178947a57172be7dc562565ed0622d424c517674d4bfc738bdefb4 +size 49105345 diff --git a/resnet/13qckf6e/cp_0014018560/cfg.json b/resnet/13qckf6e/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a881014b48adbc5093a1d0481fd952a43fda21ae --- /dev/null +++ b/resnet/13qckf6e/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0014018560/model b/resnet/13qckf6e/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..16e18f0513dbab76a25fed8693d6d21e9e703325 --- /dev/null +++ b/resnet/13qckf6e/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9639cbe6fc9409a539abb45247e81732c4bcb5dbb624c44cb8782a348f05a37 +size 49105345 diff --git a/resnet/13qckf6e/cp_0015022080/cfg.json b/resnet/13qckf6e/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae656aaba0c7fb03d9a80f8d0b6e5b96f03cbfde --- /dev/null +++ b/resnet/13qckf6e/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0015022080/model b/resnet/13qckf6e/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..b12973af1043e7fd1236b413e7dbcbcd8a151845 --- /dev/null +++ b/resnet/13qckf6e/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d0d28e9149451443aa8a9967399854809b5261a4e1a6867e0d109bb44d75a7 +size 49105345 diff --git a/resnet/13qckf6e/cp_0016020480/cfg.json b/resnet/13qckf6e/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4bfee9db863e793e6d765b128df25ed50f823c57 --- /dev/null +++ b/resnet/13qckf6e/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0016020480/model b/resnet/13qckf6e/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..a6d2651da7a288edad7de60cbc56be8a166c3f88 --- /dev/null +++ b/resnet/13qckf6e/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041341795bb333f264dcc5dca1a74eab5f3dc1190dc0d6e3f9d0bc5b2f912db9 +size 49105345 diff --git a/resnet/13qckf6e/cp_0017024000/cfg.json b/resnet/13qckf6e/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2094ef4a83aac48b9f8f1b699bdeebd29c19700c --- /dev/null +++ b/resnet/13qckf6e/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0017024000/model b/resnet/13qckf6e/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..7c9d462dd6dc0178cf6c5aa2d9ade611589411d4 --- /dev/null +++ b/resnet/13qckf6e/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ec9838977ed45357ae53397d69a24db277952576b7aae320844d71330f14d4 +size 49105345 diff --git a/resnet/13qckf6e/cp_0018022400/cfg.json b/resnet/13qckf6e/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..78342b60b4034bb5b71c7025745d81cc5830080d --- /dev/null +++ b/resnet/13qckf6e/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0018022400/model b/resnet/13qckf6e/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..2dee95ba71c10b1fe994f2573fa686b426ed6734 --- /dev/null +++ b/resnet/13qckf6e/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e29674455635bae5b597f7c31148fa502909e79b48df245e8294a3b665ee61 +size 49105345 diff --git a/resnet/13qckf6e/cp_0019025920/cfg.json b/resnet/13qckf6e/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..22ef5df803323ca5df87525d9fca08d28eda06dd --- /dev/null +++ b/resnet/13qckf6e/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0019025920/model b/resnet/13qckf6e/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..c4b87a1b75e688237bad65aa2a57a5ae60176bc2 --- /dev/null +++ b/resnet/13qckf6e/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c651fbd7b77adb98bffe511cd6d82ecff991c65339856757f1bf820853779b24 +size 49105345 diff --git a/resnet/13qckf6e/cp_0020029440/cfg.json b/resnet/13qckf6e/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b2454fb89e68ff6106586052f03792639f6d3a61 --- /dev/null +++ b/resnet/13qckf6e/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0020029440/model b/resnet/13qckf6e/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..6a244a5a1d08e28331f66f0fac26dcd44e5aca2c --- /dev/null +++ b/resnet/13qckf6e/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0bf716c489f09c4220e7f3b7a7fea393d65fb7c23c5e29cb80a45572368103 +size 49105345 diff --git a/resnet/13qckf6e/cp_0030044160/cfg.json b/resnet/13qckf6e/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..58e757c813f943930e7da2858bfbb7720d60923d --- /dev/null +++ b/resnet/13qckf6e/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0030044160/model b/resnet/13qckf6e/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..f7241ff7dae3f2e7e839d912b2f4432f4c6e83f2 --- /dev/null +++ b/resnet/13qckf6e/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:089a029ece5c70c06452f8c2854a79e8c1bf8ec55cf57e436e3bfc2d1f139aad +size 49105345 diff --git a/resnet/13qckf6e/cp_0040058880/cfg.json b/resnet/13qckf6e/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..71588cc15e67ca0e5f1fe3b3a76c52f053f6e180 --- /dev/null +++ b/resnet/13qckf6e/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0040058880/model b/resnet/13qckf6e/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..685a4f2d4b44892aa7e5250306594b624cc79d5d --- /dev/null +++ b/resnet/13qckf6e/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc03099582ca53e8eb80c6315af549a13b11ea35d14fad2ff72def8a768db22 +size 49105345 diff --git a/resnet/13qckf6e/cp_0050073600/cfg.json b/resnet/13qckf6e/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..32022612215246f7c9519b0a9bad93361a47f72b --- /dev/null +++ b/resnet/13qckf6e/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0050073600/model b/resnet/13qckf6e/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..08bb12e7f48519a2607a341e3eb4bb491bc71944 --- /dev/null +++ b/resnet/13qckf6e/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466d7d06c81b4daa5011192c1b0a725d68de6b824283333724f9020aff8312d2 +size 49105345 diff --git a/resnet/13qckf6e/cp_0060088320/cfg.json b/resnet/13qckf6e/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3c082c593cbaa7f6673c4da1187a565a2c878140 --- /dev/null +++ b/resnet/13qckf6e/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0060088320/model b/resnet/13qckf6e/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..4d0d2a03090a7454fee3e9be50ccdab53c4a5c27 --- /dev/null +++ b/resnet/13qckf6e/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d2af65920c2dc96625a99a9b8c255cfec89120a9f6a7d2b07e8d5d0f352cc26 +size 49105345 diff --git a/resnet/13qckf6e/cp_0070103040/cfg.json b/resnet/13qckf6e/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a41c6d96ad71a116a181b56dc51858e7d0fb8853 --- /dev/null +++ b/resnet/13qckf6e/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0070103040/model b/resnet/13qckf6e/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..42dbf1570727d7475692c69ba6b6a9232c8012d5 --- /dev/null +++ b/resnet/13qckf6e/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b12665d869d4609ea3c951fb107e6686c5c79415c952a41ade611353910143 +size 49105345 diff --git a/resnet/13qckf6e/cp_0080117760/cfg.json b/resnet/13qckf6e/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..acf914c33337ed389cac4da23dc76895d2982038 --- /dev/null +++ b/resnet/13qckf6e/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0080117760/model b/resnet/13qckf6e/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..1416848c4618c236a9f5c742ce29ac8a01f62571 --- /dev/null +++ b/resnet/13qckf6e/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc67cc473f9920247d81385753e8a75ba8d0c5ce2da3b85acab9f172aba119d +size 49105345 diff --git a/resnet/13qckf6e/cp_0090132480/cfg.json b/resnet/13qckf6e/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bf98ca55669d73d7c726e1416707c2df608bcd31 --- /dev/null +++ b/resnet/13qckf6e/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0090132480/model b/resnet/13qckf6e/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..177b6b274402df366519da76dd3b376b9fcd7d90 --- /dev/null +++ b/resnet/13qckf6e/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b077cdc216a67fccc6cab262e733ff357a71c5dab927cb225daeaf156217b089 +size 49105345 diff --git a/resnet/13qckf6e/cp_0100147200/cfg.json b/resnet/13qckf6e/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2dc731091e78904cc62c89eb6e95aa71c59bd81f --- /dev/null +++ b/resnet/13qckf6e/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0100147200/model b/resnet/13qckf6e/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..a172c5a02b808bdd690d249260fdee424afe09b5 --- /dev/null +++ b/resnet/13qckf6e/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9693b0ea5e9aceb085ec92ff5c3fd37f61bd07a8a3901b94132384241a44f51 +size 49105345 diff --git a/resnet/13qckf6e/cp_0110161920/cfg.json b/resnet/13qckf6e/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..636685cb2ac343fcfb195275109b952903a9e9fd --- /dev/null +++ b/resnet/13qckf6e/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0110161920/model b/resnet/13qckf6e/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..e80344bff3a5661cf7a0e5f7e919de86a1627850 --- /dev/null +++ b/resnet/13qckf6e/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd005a3e2c8064f69adf6da3f9f9bd3d4a1ed4dfc6c2d1da780f868b17e5d49 +size 49105345 diff --git a/resnet/13qckf6e/cp_0120176640/cfg.json b/resnet/13qckf6e/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bfe1a8cc53b19e52397228618fcd2aab9eacf17f --- /dev/null +++ b/resnet/13qckf6e/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0120176640/model b/resnet/13qckf6e/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..8b8b636f6867ee52df854c6eced3d8f0958b6db1 --- /dev/null +++ b/resnet/13qckf6e/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9791d3824c10187c4b676e0fd90bc59227ea5a70cd60e8002f0b5eb22b7d05 +size 49105345 diff --git a/resnet/13qckf6e/cp_0130191360/cfg.json b/resnet/13qckf6e/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..67886b2332c91703667b74e8f2bee7ec4e1c47c3 --- /dev/null +++ b/resnet/13qckf6e/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0130191360/model b/resnet/13qckf6e/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..ea238e12d4481a167020e24bd3b4526e2facedba --- /dev/null +++ b/resnet/13qckf6e/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59ecd8cd5774dac72172a633c02c91ec5f424260b75cc47b624bbeebe269f6ff +size 49105345 diff --git a/resnet/13qckf6e/cp_0140206080/cfg.json b/resnet/13qckf6e/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4dc4f37e6c75a05f4d89d74800fb6897865cd1a2 --- /dev/null +++ b/resnet/13qckf6e/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0140206080/model b/resnet/13qckf6e/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..9b3e493276ca76b0ac4ec2544b3ddb74a9cb962c --- /dev/null +++ b/resnet/13qckf6e/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaacc50834ff607c0f3cc812f89b0ac4c941bc483fd60a6417e574492b41019c +size 49105345 diff --git a/resnet/13qckf6e/cp_0150220800/cfg.json b/resnet/13qckf6e/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fd0914ed8811d343a6edd5da84f843aa062222f6 --- /dev/null +++ b/resnet/13qckf6e/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0150220800/model b/resnet/13qckf6e/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..7ece423227932ccc0003e36af8a28c93a2c69eb9 --- /dev/null +++ b/resnet/13qckf6e/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999077606393ace49f0358e052cd2d69aaadac633f14d1dfba0658659c44f00f +size 49105345 diff --git a/resnet/13qckf6e/cp_0160235520/cfg.json b/resnet/13qckf6e/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..184574db4e12d0a1cc878750add79c1940803013 --- /dev/null +++ b/resnet/13qckf6e/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0160235520/model b/resnet/13qckf6e/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..2eab4054ae59ca70e6a1464d4580e4b188f11abd --- /dev/null +++ b/resnet/13qckf6e/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669b1eeb541435d967e02e97ac8223173e0bc3091d98e59210d3d439e6bfced3 +size 49105345 diff --git a/resnet/13qckf6e/cp_0170250240/cfg.json b/resnet/13qckf6e/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6615da92edd944b50409a12863b02978755803cf --- /dev/null +++ b/resnet/13qckf6e/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0170250240/model b/resnet/13qckf6e/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..fe78b321b266dec28e9db0f1817acd13ea362c1a --- /dev/null +++ b/resnet/13qckf6e/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a6609f98bf2b44040cdd771d530b008f240e67b163f296c56389401e20a88f +size 49105345 diff --git a/resnet/13qckf6e/cp_0180264960/cfg.json b/resnet/13qckf6e/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fc0d74669a6587c22e6bf0d419510611f942c1ba --- /dev/null +++ b/resnet/13qckf6e/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0180264960/model b/resnet/13qckf6e/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..b3df9837a582044b4fdf0005d7d82edebe3c40d4 --- /dev/null +++ b/resnet/13qckf6e/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b8d2cbf762fa8a42f97f0ffce49c48b850ed0c94d2cbbc8bdabadd6cd3d927 +size 49105345 diff --git a/resnet/13qckf6e/cp_0190279680/cfg.json b/resnet/13qckf6e/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0fad8309bcde548e9364666c92832c05d759303d --- /dev/null +++ b/resnet/13qckf6e/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0190279680/model b/resnet/13qckf6e/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..5fcaade5e8cdd5e88837b61ad37996d8269072ef --- /dev/null +++ b/resnet/13qckf6e/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef9209691a84d5941e7233056df510f7ba013ddaf6e1f303098179e3046d004 +size 49105345 diff --git a/resnet/13qckf6e/cp_0200294400/cfg.json b/resnet/13qckf6e/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..07ef118b8031893563c2dd0df8e0e1d62a5ee059 --- /dev/null +++ b/resnet/13qckf6e/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0200294400/model b/resnet/13qckf6e/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..eb5edfae106a22af48e37d7cdc77be5629b7347a --- /dev/null +++ b/resnet/13qckf6e/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb4715fc3ad03fec9612ba30165ba6dc12aa452da1b078c6501f8fda7e3a95f +size 49105345 diff --git a/resnet/13qckf6e/cp_0300441600/cfg.json b/resnet/13qckf6e/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dd15c324d3001e0787a0ed1f571579835549f91c --- /dev/null +++ b/resnet/13qckf6e/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0300441600/model b/resnet/13qckf6e/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..aa31357aae35c447e8f31b17110019c837c4ea53 --- /dev/null +++ b/resnet/13qckf6e/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9c2029fce7dbd9aacc79aba7311c549ae52a218700f0a3ac920c2e6666bb2f +size 49105345 diff --git a/resnet/13qckf6e/cp_0400588800/cfg.json b/resnet/13qckf6e/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..99f1a65513392852b471adb4b0262e44fd6a0f8f --- /dev/null +++ b/resnet/13qckf6e/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0400588800/model b/resnet/13qckf6e/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..a14cdfee89d859f370571d8b5aebba906a8e7175 --- /dev/null +++ b/resnet/13qckf6e/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d9a1052eed402fa6db15c4d190aa5a9646cd89b2fcec0a77a0deef018badca +size 49105345 diff --git a/resnet/13qckf6e/cp_0500736000/cfg.json b/resnet/13qckf6e/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cb8b3d6eac568f4e85e1a54d97a4cb646dd1e76a --- /dev/null +++ b/resnet/13qckf6e/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0500736000/model b/resnet/13qckf6e/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..e546dc3c635a38f130e63e722bb58eb21a592d89 --- /dev/null +++ b/resnet/13qckf6e/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9582aa632f1d387a01474dab5bf518ff7e023d744df320f7bb7dc1b020655d2d +size 49105345 diff --git a/resnet/13qckf6e/cp_0600883200/cfg.json b/resnet/13qckf6e/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cab8c3db3a676c8b8f37bbc15f7aba8901a80b50 --- /dev/null +++ b/resnet/13qckf6e/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0600883200/model b/resnet/13qckf6e/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..f4c8968b63408b97b6ee51a56bfbe3ae7b07a9c1 --- /dev/null +++ b/resnet/13qckf6e/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d6a53bf5e6d448addc663ff02ebd1bdce3bdaf3b8706e3ab31a6d71dfa31d20 +size 49105345 diff --git a/resnet/13qckf6e/cp_0701030400/cfg.json b/resnet/13qckf6e/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a377fa2048a671ff2d63c612b185c023fefcbcac --- /dev/null +++ b/resnet/13qckf6e/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0701030400/model b/resnet/13qckf6e/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..0662aa90c658f7c4a954009feb9990f867c4a598 --- /dev/null +++ b/resnet/13qckf6e/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d790986f16fb4c5c61d5aa2d910fa65ee9d8a4cd3ce9696fb3bf23d468f37723 +size 49105345 diff --git a/resnet/13qckf6e/cp_0801177600/cfg.json b/resnet/13qckf6e/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..71ad3179c25fbc9d8a4d3552a216b39afc61baac --- /dev/null +++ b/resnet/13qckf6e/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0801177600/model b/resnet/13qckf6e/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..3a03a70df77de4841a050b07d56e1146ce0ab1eb --- /dev/null +++ b/resnet/13qckf6e/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c642c2b87743eea1b0e0bcfbf1859d6bddfbd1f05a0c229cf7e1b7b8a46dec8 +size 49105345 diff --git a/resnet/13qckf6e/cp_0901324800/cfg.json b/resnet/13qckf6e/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..67c8c226df90ed92913c306f533bb20aac4044e0 --- /dev/null +++ b/resnet/13qckf6e/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_0901324800/model b/resnet/13qckf6e/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..406e45bb43541e4ab46fb035a8e89df9b201547b --- /dev/null +++ b/resnet/13qckf6e/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd1eb486196ccc426de7ae1a8b21f3ee47e6e3b79b8aaaf4ace0a6393f269fea +size 49105345 diff --git a/resnet/13qckf6e/cp_1001472000/cfg.json b/resnet/13qckf6e/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e003bb2f31effa3a74267e3f375c101e3ceb7857 --- /dev/null +++ b/resnet/13qckf6e/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1001472000/model b/resnet/13qckf6e/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..46d14e3bf6d3af655490a6da51a419a132698d7a --- /dev/null +++ b/resnet/13qckf6e/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e15d1fd3590d5c8494aa380f54f7670b79c690cd5769b243a125a6f79f025ae +size 49105345 diff --git a/resnet/13qckf6e/cp_1101619200/cfg.json b/resnet/13qckf6e/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..797d63f978b3e19463af2402adc713866903fc75 --- /dev/null +++ b/resnet/13qckf6e/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1101619200/model b/resnet/13qckf6e/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..a15d8abf1dd9e52c5501c60ca852fd446505b52c --- /dev/null +++ b/resnet/13qckf6e/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f77d5f6236ec44abfa990393f69a78f2b14e04a62ec520a08e4f255aa19ff291 +size 49105345 diff --git a/resnet/13qckf6e/cp_1201766400/cfg.json b/resnet/13qckf6e/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9fd2e64f9100399a25339207e54c64ec9ea9eb81 --- /dev/null +++ b/resnet/13qckf6e/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1201766400/model b/resnet/13qckf6e/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..7f1dfa8cd4b3b0c6b7ffa892eba5713043fc0c6f --- /dev/null +++ b/resnet/13qckf6e/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73701134bb1a9dc90af2741966fe272ccef0dabe4c25bef04219ed3ab2e2328d +size 49105345 diff --git a/resnet/13qckf6e/cp_1301913600/cfg.json b/resnet/13qckf6e/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..35732ce94cf4ad2f295edfa206034832d27530cd --- /dev/null +++ b/resnet/13qckf6e/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1301913600/model b/resnet/13qckf6e/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..3bb83c22639460a37c2e9a6021f60ec17d089a73 --- /dev/null +++ b/resnet/13qckf6e/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:622536a9d8be9ae8b17bf1b0a0393ad9ec0e96486b7157c0adddff8e3eee6810 +size 49105345 diff --git a/resnet/13qckf6e/cp_1402060800/cfg.json b/resnet/13qckf6e/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..097a14aa29aa1d12ac59f0fbed2abdbf14d380fb --- /dev/null +++ b/resnet/13qckf6e/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1402060800/model b/resnet/13qckf6e/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..8247d2ab36dd4e931abcf1f2d8e166724deb1cba --- /dev/null +++ b/resnet/13qckf6e/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91884eeb748707c2c0c22780b44da86e604628eabf5ac5f6695a917828d5fc59 +size 49105345 diff --git a/resnet/13qckf6e/cp_1502208000/cfg.json b/resnet/13qckf6e/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae1c65316a3850e76d15baa3d99c71d201ad3281 --- /dev/null +++ b/resnet/13qckf6e/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1502208000/model b/resnet/13qckf6e/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..a6abbdb7e479cedb885dfd556d47a548c6ff1862 --- /dev/null +++ b/resnet/13qckf6e/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16952da11c3c0ddd5031602937b51fb7a26c9c59d316397a5fa08682ea146968 +size 49105345 diff --git a/resnet/13qckf6e/cp_1602355200/cfg.json b/resnet/13qckf6e/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..73a6684f4f009b91bdc57d3d68a1a5ff2e85ef4a --- /dev/null +++ b/resnet/13qckf6e/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1602355200/model b/resnet/13qckf6e/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..43fbc8883e5520ab2c1135a597323512aa98159a --- /dev/null +++ b/resnet/13qckf6e/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5eba73c4b644a26112ed01d4a8d59f14a2eacb72eadd28f04bb118f1c81745c +size 49105345 diff --git a/resnet/13qckf6e/cp_1702502400/cfg.json b/resnet/13qckf6e/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..478c804cef53795045f3f97918ec7715335bbf91 --- /dev/null +++ b/resnet/13qckf6e/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1702502400/model b/resnet/13qckf6e/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..6b3adef40dbce2cdf361f45990a053c3dd412b69 --- /dev/null +++ b/resnet/13qckf6e/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08bc5c867b5c1479e269b6c2947bb2f76c33e383f9f4ede24a9845e10a7d3826 +size 49105345 diff --git a/resnet/13qckf6e/cp_1802649600/cfg.json b/resnet/13qckf6e/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..06039ff65fce682d2cf2e8a555e392735ef5444c --- /dev/null +++ b/resnet/13qckf6e/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1802649600/model b/resnet/13qckf6e/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..29118b9ddff13ba484475b408e2918aab8492608 --- /dev/null +++ b/resnet/13qckf6e/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed0f3c7f8d8a7c58e911b9bd8088e4c106a5a363e8e9ee23b24ff7837c3f71d +size 49105345 diff --git a/resnet/13qckf6e/cp_1902796800/cfg.json b/resnet/13qckf6e/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ac01425aaa1e0eaf5e27496fbf4ec179d9509fd5 --- /dev/null +++ b/resnet/13qckf6e/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_1902796800/model b/resnet/13qckf6e/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..2dd862a0b7515584a5e40defaa24dfd6ec5160b5 --- /dev/null +++ b/resnet/13qckf6e/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c158985a8b841219e9150108d59c4f8b61210e6b350affc76a04e8c22cf1efd +size 49105345 diff --git a/resnet/13qckf6e/cp_2002944000/cfg.json b/resnet/13qckf6e/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6025772712b047593516603da2a6d5b5203f3dce --- /dev/null +++ b/resnet/13qckf6e/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 317026872}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 996363843, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/resnet/13qckf6e/cp_2002944000/model b/resnet/13qckf6e/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..ecf0bd64612f65f8657e8be6ccbe75db6702917c --- /dev/null +++ b/resnet/13qckf6e/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e192333277a5c2009cb703ce9ad9da60cf876533538ee5002fd3f8378069c9 +size 49105345 diff --git a/resnet/28n07cac/cp_0000998400/cfg.json b/resnet/28n07cac/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9ee9b6e14be515b78caf39433969f1baef28d618 --- /dev/null +++ b/resnet/28n07cac/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0000998400/model b/resnet/28n07cac/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..2cd91a6bd77433639dc6c617aac0b7136e3ff2a4 --- /dev/null +++ b/resnet/28n07cac/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d49316f984d73930ea49392f009c1aa977fd65bb355b5be5ba3e2115f74475 +size 49105345 diff --git a/resnet/28n07cac/cp_0002001920/cfg.json b/resnet/28n07cac/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..50fa45b8339ac82a62308fefe7a400d7e88f13c4 --- /dev/null +++ b/resnet/28n07cac/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0002001920/model b/resnet/28n07cac/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..5dd1f9613e85aca39e1c7acee70b92cac43afb3f --- /dev/null +++ b/resnet/28n07cac/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c49da8eedd2e2ae75e0a3d11f46cf28df28b61064c246b324e032a8704fae34 +size 49105345 diff --git a/resnet/28n07cac/cp_0003000320/cfg.json b/resnet/28n07cac/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c11816963c77f057516f3defafc450e35e1dae19 --- /dev/null +++ b/resnet/28n07cac/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0003000320/model b/resnet/28n07cac/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..d9e662fc943ebb190f4ed0e6072022482cb39657 --- /dev/null +++ b/resnet/28n07cac/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31168a10084f8c09eb9aeeae16e9dcb8b48ccacadd207ca27157048369710e2a +size 49105345 diff --git a/resnet/28n07cac/cp_0004003840/cfg.json b/resnet/28n07cac/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3cdbf9ac10a7b48e7c3c2488e502a026eae4ebd9 --- /dev/null +++ b/resnet/28n07cac/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0004003840/model b/resnet/28n07cac/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..8c474c43fed4f99b3cfccafdc12e540a632cdcfa --- /dev/null +++ b/resnet/28n07cac/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c360fd0ebc834dffcf1f083a7a03a1ab6b876ce261bc36c4910abc1f7992d24c +size 49105345 diff --git a/resnet/28n07cac/cp_0005007360/cfg.json b/resnet/28n07cac/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..724b45d6d06a74281a00cbaa7637cffaacec10f4 --- /dev/null +++ b/resnet/28n07cac/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0005007360/model b/resnet/28n07cac/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..b70ec26a9fd8c94b0e76e888307d6d825da3d2b0 --- /dev/null +++ b/resnet/28n07cac/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f7b63ec64faccd721ce4e5e81968c49fd34ef6ef9dfa7f975a3eb798a370ef9 +size 49105345 diff --git a/resnet/28n07cac/cp_0006005760/cfg.json b/resnet/28n07cac/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5f89385a6ed84156af484fec2f7044841b9bfc0d --- /dev/null +++ b/resnet/28n07cac/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0006005760/model b/resnet/28n07cac/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..f720b4bec5c582bd0ab0e62a986617a36fd2d268 --- /dev/null +++ b/resnet/28n07cac/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833b3589972c1e86d3ec5c8936457404a6642c7c5dd262e2289fadad0955a6b8 +size 49105345 diff --git a/resnet/28n07cac/cp_0007009280/cfg.json b/resnet/28n07cac/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0f5bb74ccb57c0f8b1d93f81d38fbb3ca2864f71 --- /dev/null +++ b/resnet/28n07cac/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0007009280/model b/resnet/28n07cac/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..0bf43ed34ab579a519184cf928befe8189027e69 --- /dev/null +++ b/resnet/28n07cac/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f559f7121eb4df90d7f5e300bbe4380bd5189a9ceda65e978026d72aa68053 +size 49105345 diff --git a/resnet/28n07cac/cp_0008007680/cfg.json b/resnet/28n07cac/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6f9c157716c3c295357c07d69771f02ee067c7b0 --- /dev/null +++ b/resnet/28n07cac/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0008007680/model b/resnet/28n07cac/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..327a6a3d3edd6fe814ff6766d50a22e44e0c4315 --- /dev/null +++ b/resnet/28n07cac/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:accb32e5ff6c17daed48793d9951e1ee357edef74df62ca78a0dac10419ec0d7 +size 49105345 diff --git a/resnet/28n07cac/cp_0009011200/cfg.json b/resnet/28n07cac/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..df7650e164ef7896913b2c05d00aadbc4fa4468c --- /dev/null +++ b/resnet/28n07cac/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0009011200/model b/resnet/28n07cac/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..491c45b337a84d208153a8d14c36fc7a696405d4 --- /dev/null +++ b/resnet/28n07cac/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a1f443022eaad6f3a0b2323d15601ef1caec2c2824215b68872bfea109c876 +size 49105345 diff --git a/resnet/28n07cac/cp_0010014720/cfg.json b/resnet/28n07cac/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e0d551336da52848526b30018782f3d4212ae72f --- /dev/null +++ b/resnet/28n07cac/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0010014720/model b/resnet/28n07cac/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..298c8322e18677201e8a53f5dac8c57f746c1e6a --- /dev/null +++ b/resnet/28n07cac/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2457dc591422c045c4180851cc9a1f108052348d0ba39b72886bffb77819951 +size 49105345 diff --git a/resnet/28n07cac/cp_0011013120/cfg.json b/resnet/28n07cac/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2b33eb1e12185392e40a5577190241f403126cb6 --- /dev/null +++ b/resnet/28n07cac/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0011013120/model b/resnet/28n07cac/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..a66ddbf025adac7108ec8b04162b8b3acba3d1fa --- /dev/null +++ b/resnet/28n07cac/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21723c1a7791a47671341b580d700a5514c90a1a0779c822d0db44030a580e8e +size 49105345 diff --git a/resnet/28n07cac/cp_0012016640/cfg.json b/resnet/28n07cac/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..cd41c445278f57ff91ec866fed19f1e0408fae84 --- /dev/null +++ b/resnet/28n07cac/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0012016640/model b/resnet/28n07cac/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..8688fbe4f9706ac108174ed32ce2eca7e9be5a9a --- /dev/null +++ b/resnet/28n07cac/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09e338453e54e87684618744a8796efcedfc871fd65c2b97d4f63515eee67c8 +size 49105345 diff --git a/resnet/28n07cac/cp_0013015040/cfg.json b/resnet/28n07cac/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e863f6abed0bae91aed03cd052bbb9d2725a50af --- /dev/null +++ b/resnet/28n07cac/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0013015040/model b/resnet/28n07cac/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..c64731fb8d4f86e3babf3f26a755978d0f554cc2 --- /dev/null +++ b/resnet/28n07cac/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9466c960b8109a8fc7a203c1fc29037bda7c35c69d714a4d8d5370505f7f0bf +size 49105345 diff --git a/resnet/28n07cac/cp_0014018560/cfg.json b/resnet/28n07cac/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb10e7d1d9103a16b58bc54a4820fec70f1a35b --- /dev/null +++ b/resnet/28n07cac/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0014018560/model b/resnet/28n07cac/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..ee10a2c1b7a10612915947e93f8eccbf968268ce --- /dev/null +++ b/resnet/28n07cac/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18790892c330dbca5b0dc30427eda17e3224f51da8981c7e1381b4b5a70dd029 +size 49105345 diff --git a/resnet/28n07cac/cp_0015022080/cfg.json b/resnet/28n07cac/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..55a41c0f056c330fe094d82bffcb7bf45e0367bf --- /dev/null +++ b/resnet/28n07cac/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0015022080/model b/resnet/28n07cac/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..6d54be58bc05543add981838722e743c58a1fbc9 --- /dev/null +++ b/resnet/28n07cac/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b7418d3dba79c3f7bf24d58be35021a45de67d1521ba79df12dc993cd2169a +size 49105345 diff --git a/resnet/28n07cac/cp_0016020480/cfg.json b/resnet/28n07cac/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dc1a574ab9f80c1b679274c1180f3c604f2fb62e --- /dev/null +++ b/resnet/28n07cac/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0016020480/model b/resnet/28n07cac/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..621c762fa4e0e333cdf7607b250637b89cccaaa0 --- /dev/null +++ b/resnet/28n07cac/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc5ba9feba3908c4b91223b026278f3d88ba7171c9b4cea844c7213c620ac3e +size 49105345 diff --git a/resnet/28n07cac/cp_0017024000/cfg.json b/resnet/28n07cac/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..118909e593ae5e362a7f45433d97914c998c3d7e --- /dev/null +++ b/resnet/28n07cac/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0017024000/model b/resnet/28n07cac/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..463a6899117c6aeb7ec662f1aae87d9102ac5676 --- /dev/null +++ b/resnet/28n07cac/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30ec1fd3e865fe6563edae197b0b82eab8d64edcef953f213bee105bf9c983f9 +size 49105345 diff --git a/resnet/28n07cac/cp_0018022400/cfg.json b/resnet/28n07cac/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1eaff05b64c40d2bba66c7a7f97f78d9a8613579 --- /dev/null +++ b/resnet/28n07cac/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0018022400/model b/resnet/28n07cac/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..cf09fe17479eb565ee2f23c8a4434ae07395afde --- /dev/null +++ b/resnet/28n07cac/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:330d486ca296546f772c7f7396b6e3c9a892c8bfb8759aa5fef3b279ca4a93ba +size 49105345 diff --git a/resnet/28n07cac/cp_0019025920/cfg.json b/resnet/28n07cac/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..23bfa94ec36550ce41556e08176ae52d811c849a --- /dev/null +++ b/resnet/28n07cac/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0019025920/model b/resnet/28n07cac/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..6d5af35dd5e2e7b958009ebd107feea681dccf97 --- /dev/null +++ b/resnet/28n07cac/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db4b9dc8890a157a20111aa1a14084f5b15d74a70295b718c2dbe1854d80bd59 +size 49105345 diff --git a/resnet/28n07cac/cp_0020029440/cfg.json b/resnet/28n07cac/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1a73a552431017a9b4559013fd20e7dadc486f13 --- /dev/null +++ b/resnet/28n07cac/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0020029440/model b/resnet/28n07cac/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..fa51faf2d2b08e4f98064e3b8d837795248d10c4 --- /dev/null +++ b/resnet/28n07cac/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a4efe6eee2f089926d0fec42dda71c712041823e3078799c44cb126cb26cec +size 49105345 diff --git a/resnet/28n07cac/cp_0030044160/cfg.json b/resnet/28n07cac/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7bf6cf24bad10442dce1effaf991c6cf7ab8f2c3 --- /dev/null +++ b/resnet/28n07cac/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0030044160/model b/resnet/28n07cac/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..1acf97ee03c85310523ea8a7893812fa6467834d --- /dev/null +++ b/resnet/28n07cac/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcc0cc8fb12afb250615a11a3b9f5a975551c6ea6cca54b12cb3c98e10b74fb +size 49105345 diff --git a/resnet/28n07cac/cp_0040058880/cfg.json b/resnet/28n07cac/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3d40a4ac87695ea4660498c1683a08948c167b27 --- /dev/null +++ b/resnet/28n07cac/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0040058880/model b/resnet/28n07cac/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..f2e36bde01d8c24a70854806698e72c310f5b5bc --- /dev/null +++ b/resnet/28n07cac/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf4e3d819bb0c99eb990b5204ccfff4e8e0967f0c54fb8eaecebb8b326b760d +size 49105345 diff --git a/resnet/28n07cac/cp_0050073600/cfg.json b/resnet/28n07cac/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..593b172dcd2d8732b0742c845d2cfcf09f4a28d3 --- /dev/null +++ b/resnet/28n07cac/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0050073600/model b/resnet/28n07cac/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..876598b4549422333460bd5cda2284c1d40514e1 --- /dev/null +++ b/resnet/28n07cac/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58516d73787e7903525afe232a510e762132226d0c67d6704a2cfe5ee8ffbd5 +size 49105345 diff --git a/resnet/28n07cac/cp_0060088320/cfg.json b/resnet/28n07cac/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3b31044b6fb1a4636b3949e1c2a640b9768ee5c5 --- /dev/null +++ b/resnet/28n07cac/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0060088320/model b/resnet/28n07cac/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..041b2d03889617fade2a5e5fd33252d7c6e34a04 --- /dev/null +++ b/resnet/28n07cac/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a8ad6083ad0b30115c427f9ee9b082dbc2e9bcd9abb54bf52b56b5692fcaaf +size 49105345 diff --git a/resnet/28n07cac/cp_0070103040/cfg.json b/resnet/28n07cac/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8cb313ebd2acf395d49c8e94f68e5f0d074ceb99 --- /dev/null +++ b/resnet/28n07cac/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0070103040/model b/resnet/28n07cac/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..8eb319c9f7cb659c568637ef84cfb0141684570d --- /dev/null +++ b/resnet/28n07cac/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ffb6df78ca3b29068819a7ce2824b89a437fc5a6ed07a7579e4f2e2f491122e +size 49105345 diff --git a/resnet/28n07cac/cp_0080117760/cfg.json b/resnet/28n07cac/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d314e599904f7fb04fa6bd63f7cc4e03232017e7 --- /dev/null +++ b/resnet/28n07cac/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0080117760/model b/resnet/28n07cac/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..0d213094e6230b8c31d950347890f30a504891a5 --- /dev/null +++ b/resnet/28n07cac/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a352dd66acf3a59156c8ce6e3fdf1953ee59f6e4dcf2f9e91a9f8a53d8abb7 +size 49105345 diff --git a/resnet/28n07cac/cp_0090132480/cfg.json b/resnet/28n07cac/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9f7e46c3edd80f5d772c896e25b1260813cd3a88 --- /dev/null +++ b/resnet/28n07cac/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0090132480/model b/resnet/28n07cac/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..708f5078f9602f3fbcd230b39604a40290b26654 --- /dev/null +++ b/resnet/28n07cac/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6734dda77e95d396f93c2b2407cf2e31d8af96a7dbc2702d6ee5482e859ebea +size 49105345 diff --git a/resnet/28n07cac/cp_0100147200/cfg.json b/resnet/28n07cac/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b228418e94e8c9c82383cc70230d0de532453fa8 --- /dev/null +++ b/resnet/28n07cac/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0100147200/model b/resnet/28n07cac/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..ff205e00c87ac09d17a1bbf4f784e6163222f484 --- /dev/null +++ b/resnet/28n07cac/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf4bf2fa07320418725e98b8daccf66a139f426ea9c1879c3170518f23e42c7 +size 49105345 diff --git a/resnet/28n07cac/cp_0110161920/cfg.json b/resnet/28n07cac/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..38f7588b748d3b8e3809fcf1ce1382014aa41e92 --- /dev/null +++ b/resnet/28n07cac/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0110161920/model b/resnet/28n07cac/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..3503d3a173daef89e33a1d59491363e5a0b9ae91 --- /dev/null +++ b/resnet/28n07cac/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f44cbbe197c0b1c08c4d32ae8512f2b1a31150555bab9caca019581ebddade83 +size 49105345 diff --git a/resnet/28n07cac/cp_0120176640/cfg.json b/resnet/28n07cac/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2b5d13ee1f998eb7bd3eedc8b7dae16d3586130f --- /dev/null +++ b/resnet/28n07cac/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0120176640/model b/resnet/28n07cac/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..3a235fa7f0c5acf5f74744751c8da37c41c89b1d --- /dev/null +++ b/resnet/28n07cac/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f54441b17d467f6e259d703a8ed2374833585a7f1d4e37acb2acac5e2cfefff +size 49105345 diff --git a/resnet/28n07cac/cp_0130191360/cfg.json b/resnet/28n07cac/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9347718a68a69199e974cf175e6e211e4f86d723 --- /dev/null +++ b/resnet/28n07cac/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0130191360/model b/resnet/28n07cac/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..0092c119481a11226109157d322ec1075620578a --- /dev/null +++ b/resnet/28n07cac/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193e394f5b2b8b17b5a1330b5aeac0b4b347d4df58b2b021e8929f347f6b24c7 +size 49105345 diff --git a/resnet/28n07cac/cp_0140206080/cfg.json b/resnet/28n07cac/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..747b5d87879de37e71fb67e3859e958703690a3c --- /dev/null +++ b/resnet/28n07cac/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0140206080/model b/resnet/28n07cac/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..448726817ced85655ad38b4019c1348e71b40bd0 --- /dev/null +++ b/resnet/28n07cac/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f16c42f1c45db8bee839be04340b7f887c5cf01c486bf1c54df074a72b2480f +size 49105345 diff --git a/resnet/28n07cac/cp_0150220800/cfg.json b/resnet/28n07cac/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7d75a2429d03f7c7a342cdace246cece18029795 --- /dev/null +++ b/resnet/28n07cac/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0150220800/model b/resnet/28n07cac/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..cc509112cb8c582ef001a38ed72776f2410f1a73 --- /dev/null +++ b/resnet/28n07cac/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9257c8919784b15a545cc72ba503b155ebc58d05dc7ed09c3e9f3d1a82ec0321 +size 49105345 diff --git a/resnet/28n07cac/cp_0160235520/cfg.json b/resnet/28n07cac/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..189994ae56211aa0f103600c7ef6a3a1787080cc --- /dev/null +++ b/resnet/28n07cac/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0160235520/model b/resnet/28n07cac/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..ef180bb3e3ce74d5be3f4f7d2afb28be4063a4c4 --- /dev/null +++ b/resnet/28n07cac/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8aa71f22606a551f62a41f7bc6f70fb0d6af9406828469ba467896f5db814e +size 49105345 diff --git a/resnet/28n07cac/cp_0170250240/cfg.json b/resnet/28n07cac/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8483f86c834ebc1bdb896d015ca4d1277709cad9 --- /dev/null +++ b/resnet/28n07cac/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0170250240/model b/resnet/28n07cac/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..5fe53a9dee53d6719e8d4b5e13f3c569cf3c2b53 --- /dev/null +++ b/resnet/28n07cac/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411350327c06b5c31ce0fbe4cba2c0c5495e75eab5eb81a7962c86d292a8febd +size 49105345 diff --git a/resnet/28n07cac/cp_0180264960/cfg.json b/resnet/28n07cac/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9c1916b75ef361fb391b3450cf49817a1ee718e5 --- /dev/null +++ b/resnet/28n07cac/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0180264960/model b/resnet/28n07cac/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..2adba8bf9739aeade16d8c9fc52f82d852134e14 --- /dev/null +++ b/resnet/28n07cac/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4efe40bf89494ead41878dab898d0299fda7132948d35450072889f51e590925 +size 49105345 diff --git a/resnet/28n07cac/cp_0190279680/cfg.json b/resnet/28n07cac/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e275b246628c0ddbd0e2212a6c005bee9811b350 --- /dev/null +++ b/resnet/28n07cac/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0190279680/model b/resnet/28n07cac/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..611826ee62c8bdab33350b3746849ae49086d47a --- /dev/null +++ b/resnet/28n07cac/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d94a6dae720f4abdad510244dfe6212b38a4adf7bc7f88814d1c788350dfb12 +size 49105345 diff --git a/resnet/28n07cac/cp_0200294400/cfg.json b/resnet/28n07cac/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e4fdccdc265a7c0dbb2de4635e093f8b1490d19e --- /dev/null +++ b/resnet/28n07cac/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0200294400/model b/resnet/28n07cac/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..7b60843899b73995842a1b509091d687dfc8a5b0 --- /dev/null +++ b/resnet/28n07cac/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5360092c6e835a41fccf21672f1bf91aff8e6999c394014a4635db7b4ef8479a +size 49105345 diff --git a/resnet/28n07cac/cp_0300441600/cfg.json b/resnet/28n07cac/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c92ab78df88f478190dc360fefd0d8b938f4d16d --- /dev/null +++ b/resnet/28n07cac/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0300441600/model b/resnet/28n07cac/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..9eab3880df780913360d3c45383e1d3c5ccadba3 --- /dev/null +++ b/resnet/28n07cac/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b65635d26140ae6e7b57823aec26446f15955e6d5a52a11fa750b9e4f99f6d2 +size 49105345 diff --git a/resnet/28n07cac/cp_0400588800/cfg.json b/resnet/28n07cac/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..24c96978d0adf8dfc14d72c878b460271cecaca0 --- /dev/null +++ b/resnet/28n07cac/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0400588800/model b/resnet/28n07cac/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..e1200d141a5108ecfddf7cf5103b6e5ef56830d9 --- /dev/null +++ b/resnet/28n07cac/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b4cb02cf2a019d1cfc067f4dadafb89ec326236a563dddd54d715277cfe62b +size 49105345 diff --git a/resnet/28n07cac/cp_0500736000/cfg.json b/resnet/28n07cac/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..98fd12f762eec582a2e6b16f0cb9c5c6685c1ffa --- /dev/null +++ b/resnet/28n07cac/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0500736000/model b/resnet/28n07cac/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..55817af93738796dc567d21eac480594bef57f86 --- /dev/null +++ b/resnet/28n07cac/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429376b20f88d94a535b824a61af077b3f247e49fe0b448a730d1f4e1d9db48b +size 49105345 diff --git a/resnet/28n07cac/cp_0600883200/cfg.json b/resnet/28n07cac/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6a9d4d1840c437c5ce670f29df157147fe90a999 --- /dev/null +++ b/resnet/28n07cac/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0600883200/model b/resnet/28n07cac/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..425d26a5430a2ef165fe689224c2cb057db3d2f2 --- /dev/null +++ b/resnet/28n07cac/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9601edd45dbb2c9b634e346ed9bc57a9c18d5690c3695a0262d0014655a87839 +size 49105345 diff --git a/resnet/28n07cac/cp_0701030400/cfg.json b/resnet/28n07cac/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d966732d7213b69d9a735469f2e11b9a63de838a --- /dev/null +++ b/resnet/28n07cac/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0701030400/model b/resnet/28n07cac/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..244e6c2faf2ce0842b65e57890b596bb7e8d914a --- /dev/null +++ b/resnet/28n07cac/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6876634843009f071b6bf585b80c4e9e601305cee0e03f4dc7f3bbd91b7ea529 +size 49105345 diff --git a/resnet/28n07cac/cp_0801177600/cfg.json b/resnet/28n07cac/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3e7c11794b339c128cf84c5166f62acbcc77ae86 --- /dev/null +++ b/resnet/28n07cac/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0801177600/model b/resnet/28n07cac/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..8295eff03d4c194bf70812b45be1e66402810355 --- /dev/null +++ b/resnet/28n07cac/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d56232fbf0a84a119df3c3a1bd760dc4e0a050e72d961db524a81442fe84c68 +size 49105345 diff --git a/resnet/28n07cac/cp_0901324800/cfg.json b/resnet/28n07cac/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dfedd6d1ccc38a51440a370941b490b925bb8a19 --- /dev/null +++ b/resnet/28n07cac/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/resnet/28n07cac/cp_0901324800/model b/resnet/28n07cac/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..3aea81f93534631a565d58cd7642ad4a2f72946e --- /dev/null +++ b/resnet/28n07cac/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30375f78b592b7023393c81b14052e1d2bc1ef53181e5df241421e7aeb8b8697 +size 49105345 diff --git a/resnet/28n07cac/cp_1001472000/cfg.json b/resnet/28n07cac/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d432fc8ca88b5d466be264d8557828d23177bf33 --- /dev/null +++ b/resnet/28n07cac/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1001472000/model b/resnet/28n07cac/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..b62a1fb42f4863d6669e4e24e637d82f314652c1 --- /dev/null +++ b/resnet/28n07cac/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f55e09aa6a4ce0a36a5e31023820f24ed3bfe23f49bf81065f97c435da8472 +size 49105345 diff --git a/resnet/28n07cac/cp_1101619200/cfg.json b/resnet/28n07cac/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0e7de2f0d587dd2efe8149766894130bb4b0d1d2 --- /dev/null +++ b/resnet/28n07cac/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1101619200/model b/resnet/28n07cac/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..a927ae78b3c29abac4de4f2613b84f7222cb375b --- /dev/null +++ b/resnet/28n07cac/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b4953fa80baa07d6d6e7b6d820ead9cc7659c965f0550c8f880dc5bd07f6cc +size 49105345 diff --git a/resnet/28n07cac/cp_1201766400/cfg.json b/resnet/28n07cac/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dcc4bc53eba6322213fb9c208643e9d03fe593f8 --- /dev/null +++ b/resnet/28n07cac/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1201766400/model b/resnet/28n07cac/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..70ecd72eb8b3f5fc04df6464138c1bbc687d4854 --- /dev/null +++ b/resnet/28n07cac/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f0cdce5eac0d310ad39e3827bb725575358c7b2397e39f6dc4b8e1d6f21ae4 +size 49105345 diff --git a/resnet/28n07cac/cp_1301913600/cfg.json b/resnet/28n07cac/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ebd123565142a9f78d4c750390bacb0fccbeaada --- /dev/null +++ b/resnet/28n07cac/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1301913600/model b/resnet/28n07cac/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..3aea8b5e3738e2c5e2d301f82159156fa6f1ba3b --- /dev/null +++ b/resnet/28n07cac/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623698c91ce48ab6819129220d01cb32754b4cde1c2165a0356b7fc02d444c45 +size 49105345 diff --git a/resnet/28n07cac/cp_1402060800/cfg.json b/resnet/28n07cac/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7ffbf7357835db1d2f39baac155ac2e5da6d2a8b --- /dev/null +++ b/resnet/28n07cac/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1402060800/model b/resnet/28n07cac/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..0920837d3e308dc5c5a278a1f9087e91785cdd09 --- /dev/null +++ b/resnet/28n07cac/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ab250c4c1b329ab25b1890b37757c78b58b991fcf6e4d4c8b5a4e38ca1250f +size 49105345 diff --git a/resnet/28n07cac/cp_1502208000/cfg.json b/resnet/28n07cac/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..91384c621951fbf86530e4abaa46c3979b5b859d --- /dev/null +++ b/resnet/28n07cac/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1502208000/model b/resnet/28n07cac/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..18c3e9466f525ab19561fe1eb161778a4373beb2 --- /dev/null +++ b/resnet/28n07cac/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445c5413329bcc5452d03a5917ac3afb795cf3c250204c64827338e7341a393e +size 49105345 diff --git a/resnet/28n07cac/cp_1602355200/cfg.json b/resnet/28n07cac/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c8af0eb6f69f9d903149237fbf9e53917cdc0a1d --- /dev/null +++ b/resnet/28n07cac/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1602355200/model b/resnet/28n07cac/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..45791105d0537f021c73150d8140b845675fbc51 --- /dev/null +++ b/resnet/28n07cac/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9602dcdad05de5575ad8badbd7d4284852a3fa6b489acff7323ac1e53508464 +size 49105345 diff --git a/resnet/28n07cac/cp_1702502400/cfg.json b/resnet/28n07cac/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5a7d7a4185053629e41356ca697aaa2c4bf4ba23 --- /dev/null +++ b/resnet/28n07cac/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1702502400/model b/resnet/28n07cac/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..48dd90e9e6f7a6a060dd28302a8667ce35b8590e --- /dev/null +++ b/resnet/28n07cac/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327976ba791cf8d363dced883590a58261ae1e3add27050467a3f20d6c1d25a0 +size 49105345 diff --git a/resnet/28n07cac/cp_1802649600/cfg.json b/resnet/28n07cac/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..aec741226333682ab810bcd9ed5a3ba57f5dc584 --- /dev/null +++ b/resnet/28n07cac/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1802649600/model b/resnet/28n07cac/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..8b26717902a2106ac813b8c01dc3cac8d62faf0a --- /dev/null +++ b/resnet/28n07cac/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe3bcbca0a9895a5a3f745882d1e7dc4736fd6dc8fa5e4bf4c04852602e7fa77 +size 49105345 diff --git a/resnet/28n07cac/cp_1902796800/cfg.json b/resnet/28n07cac/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e6beeeb0537792c51f3e7cb7a6f25b926d4a7f78 --- /dev/null +++ b/resnet/28n07cac/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/resnet/28n07cac/cp_1902796800/model b/resnet/28n07cac/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..29e8167bd812a2b21574930f016f90f146afd458 --- /dev/null +++ b/resnet/28n07cac/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d02c0f288da6f97a9a13e46b047050a45b39f4af50d27376b3ad9f1326fd10 +size 49105345 diff --git a/resnet/28n07cac/cp_2002944000/cfg.json b/resnet/28n07cac/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c2044e924089f26f5ed509c835d34022bc4ea9c3 --- /dev/null +++ b/resnet/28n07cac/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1413380980}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 823413895, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/resnet/28n07cac/cp_2002944000/model b/resnet/28n07cac/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..e5b91a2f5cebd72c7f341f0a023dc114253d5645 --- /dev/null +++ b/resnet/28n07cac/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c653ba6f6ed053a60bd24c137ad289f87e8c93520b7469051d2b9c81063658 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0000998400/cfg.json b/resnet/8ul1b23e/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8075e0932216a4851907ab64e8c4c0d0d5416d67 --- /dev/null +++ b/resnet/8ul1b23e/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0000998400/model b/resnet/8ul1b23e/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..f84f82af84476b7e48cf1e2c9a670bf28db73efa --- /dev/null +++ b/resnet/8ul1b23e/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a1c3d58353cd86a07d8b3a5be36cef8dc4fe0028b35c79f158f2fad46e9302c +size 49105345 diff --git a/resnet/8ul1b23e/cp_0002001920/cfg.json b/resnet/8ul1b23e/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c0af9141808921169b252d604d6838ade411ce50 --- /dev/null +++ b/resnet/8ul1b23e/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0002001920/model b/resnet/8ul1b23e/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..d5a1c34fb3306cd32625398e3bb54e974b88c7cf --- /dev/null +++ b/resnet/8ul1b23e/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a79fde880812bd68f72c413c86de87c6485cc609b5faefe4ed4c79888a5033f +size 49105345 diff --git a/resnet/8ul1b23e/cp_0003000320/cfg.json b/resnet/8ul1b23e/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0518a94adfe64b0b677439f53caf8e3888edb426 --- /dev/null +++ b/resnet/8ul1b23e/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0003000320/model b/resnet/8ul1b23e/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..f7f913f60947f7575dff8c37af44cd6c2264ea1b --- /dev/null +++ b/resnet/8ul1b23e/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26cad06e7320aca5d4f41e09f2afc8cedb9df1c5e300b953bc4b07fdeb865bd4 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0004003840/cfg.json b/resnet/8ul1b23e/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7a7752f9243ec467544061ba7236dcf21b440993 --- /dev/null +++ b/resnet/8ul1b23e/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0004003840/model b/resnet/8ul1b23e/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..b314005c4affe277544e03505f20509c51c9cc71 --- /dev/null +++ b/resnet/8ul1b23e/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7ded7370fc26516c0752eca398e7a27f6a9d14bc1783fa3ebb69eb89e88be82 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0005007360/cfg.json b/resnet/8ul1b23e/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f1065124530ed5477ddf0d15af2a2c2d6f50c7dd --- /dev/null +++ b/resnet/8ul1b23e/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0005007360/model b/resnet/8ul1b23e/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..5ed4e46f50e8881b9fe6f4a756699ccdf7772dc9 --- /dev/null +++ b/resnet/8ul1b23e/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f67f570db474eb9100d7fff35cb0b176021bdbd4b7c6295ffc4e47433416496 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0006005760/cfg.json b/resnet/8ul1b23e/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e188cedfd341e349e7355cf30bafa350dc54af40 --- /dev/null +++ b/resnet/8ul1b23e/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0006005760/model b/resnet/8ul1b23e/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..9e4ed090940e60be5ddce467d3ec523379e2976a --- /dev/null +++ b/resnet/8ul1b23e/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a66a1cb313f5eec500f1ad09addac576f1503b54889f1b3a61a0bb4ff6caee +size 49105345 diff --git a/resnet/8ul1b23e/cp_0007009280/cfg.json b/resnet/8ul1b23e/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a4594d8a185418b0fe858dba799b0f0ca533b2a3 --- /dev/null +++ b/resnet/8ul1b23e/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0007009280/model b/resnet/8ul1b23e/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..a61bbf9a1640111e9a1ba959a1fb6fc39e9021c6 --- /dev/null +++ b/resnet/8ul1b23e/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d0b67c649641544dd5d65b1e211f99542564eda2b680d0d79aaffe17da35de +size 49105345 diff --git a/resnet/8ul1b23e/cp_0008007680/cfg.json b/resnet/8ul1b23e/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8604877264cf87beeed21f25edc2b1086c6e32b8 --- /dev/null +++ b/resnet/8ul1b23e/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0008007680/model b/resnet/8ul1b23e/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..e4efbc0878b3f4dad82cf1f90477081f0db2eb6f --- /dev/null +++ b/resnet/8ul1b23e/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e18863955aa46624ce1e3345afe1d1353234d4a728ff96f7e8318e6db45b576 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0009011200/cfg.json b/resnet/8ul1b23e/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ab69df6afad08398ffb455338673216ad938cfa6 --- /dev/null +++ b/resnet/8ul1b23e/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0009011200/model b/resnet/8ul1b23e/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..cfd21ce842f5a4b1fb3f820b41d09948cb5180c5 --- /dev/null +++ b/resnet/8ul1b23e/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9b89dc4f3c11641dd30ad72bf780dd18544d17142ca3d173840be613dc54ec +size 49105345 diff --git a/resnet/8ul1b23e/cp_0010014720/cfg.json b/resnet/8ul1b23e/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ac5726c275a73b37951d39ee593bd1602dc3e492 --- /dev/null +++ b/resnet/8ul1b23e/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0010014720/model b/resnet/8ul1b23e/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..0e456b115307713e0ee8d8bce97bc60f2a0aa831 --- /dev/null +++ b/resnet/8ul1b23e/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6df130ab4d2d54459cd5d2d6bdd081e845d0936900488123b449380e1af31c5 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0011013120/cfg.json b/resnet/8ul1b23e/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..de1c5d591a0e275172cbcd55e155478e5738bf5e --- /dev/null +++ b/resnet/8ul1b23e/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0011013120/model b/resnet/8ul1b23e/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..175cddfcb6f7f1d8575e5d22929d5ce2c0ea5e58 --- /dev/null +++ b/resnet/8ul1b23e/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:189874cea9e89f2926ced316432f7fe5922e1174debca82bad91e8cbf38ed76e +size 49105345 diff --git a/resnet/8ul1b23e/cp_0012016640/cfg.json b/resnet/8ul1b23e/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..41f566902f65a7d74721436aae0f2a534deb71d2 --- /dev/null +++ b/resnet/8ul1b23e/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0012016640/model b/resnet/8ul1b23e/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..631aadbf99e3d190b3a73b902429be90c48e9c0b --- /dev/null +++ b/resnet/8ul1b23e/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5734f69d2f451b428818dc27344c4c45a5b8367e0ccb3f5e5eba176b4efdf6ae +size 49105345 diff --git a/resnet/8ul1b23e/cp_0013015040/cfg.json b/resnet/8ul1b23e/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..84b542b8558078719aa8a26ac032dfac46e0cba2 --- /dev/null +++ b/resnet/8ul1b23e/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0013015040/model b/resnet/8ul1b23e/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..538184dad72ce0e162d94a090b38f41ba745f3f2 --- /dev/null +++ b/resnet/8ul1b23e/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b4f1957abc8741119d46ce78bc0ebedbeabbb256f092bca5bb93cd140ff77d +size 49105345 diff --git a/resnet/8ul1b23e/cp_0014018560/cfg.json b/resnet/8ul1b23e/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..878ded9c7a073c8be2da41e434449612fb288ee6 --- /dev/null +++ b/resnet/8ul1b23e/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0014018560/model b/resnet/8ul1b23e/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..0b77311a76659e147e0567e0e409fd0d372b85a1 --- /dev/null +++ b/resnet/8ul1b23e/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1edb6fe1c6a8ed3eeec81abd8940df5dd874a8bd06cbbcaf4dbffcbf9cfa8b +size 49105345 diff --git a/resnet/8ul1b23e/cp_0015022080/cfg.json b/resnet/8ul1b23e/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..98a26bdba7bf73f3d95a0a51092e47a141f72508 --- /dev/null +++ b/resnet/8ul1b23e/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0015022080/model b/resnet/8ul1b23e/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..0cadd4243a8bd39921f078a00bcc75238eccce9e --- /dev/null +++ b/resnet/8ul1b23e/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b1f9f5543e32d2130c0ff58405bf224e1d44e851faaa81c004f1e9913b83df +size 49105345 diff --git a/resnet/8ul1b23e/cp_0016020480/cfg.json b/resnet/8ul1b23e/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1d661e09e531ed4aca71152dd8c9d5e1f5174f54 --- /dev/null +++ b/resnet/8ul1b23e/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0016020480/model b/resnet/8ul1b23e/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..c5226483261169d82e75b2c9fdb913334ccae2c5 --- /dev/null +++ b/resnet/8ul1b23e/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed3547b5ded696f98ef7323ce8fb465db1d0688799294ce523424653b0e99a9 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0017024000/cfg.json b/resnet/8ul1b23e/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b56b0243d1af1761fb84b1add7d6d3dd96b37509 --- /dev/null +++ b/resnet/8ul1b23e/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0017024000/model b/resnet/8ul1b23e/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..fc4f72298b5fbea947a2bba596e1ff99f3a17f2e --- /dev/null +++ b/resnet/8ul1b23e/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12fad2d3f378261a052c1089d20429558948cdf29fb2c2d0d50f1ff2d7035012 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0018022400/cfg.json b/resnet/8ul1b23e/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fb426a2400b2c6293349b4e40b4cf9a48cd27fc7 --- /dev/null +++ b/resnet/8ul1b23e/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0018022400/model b/resnet/8ul1b23e/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..8db888ab360d44f4380996a96ccec000056c0735 --- /dev/null +++ b/resnet/8ul1b23e/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff600ac8ce066c4db625366dfcba61d1b74f97e5d3e1e0936c1afc7810523088 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0019025920/cfg.json b/resnet/8ul1b23e/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3a50ef62fa1f1377fd0f5a1add446e22a4356314 --- /dev/null +++ b/resnet/8ul1b23e/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0019025920/model b/resnet/8ul1b23e/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..f6a6aefd733dea2b244a2356e487b6762773e4cd --- /dev/null +++ b/resnet/8ul1b23e/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8728d34149b0957d2f262837687b9a90872dd39e630a49655a8a86e8fd85fd +size 49105345 diff --git a/resnet/8ul1b23e/cp_0020029440/cfg.json b/resnet/8ul1b23e/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1b74d397abab3708152f8539ba55128111e15c8e --- /dev/null +++ b/resnet/8ul1b23e/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0020029440/model b/resnet/8ul1b23e/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..a8772d62f49c707e7a73a6cdf310640cb5cd8189 --- /dev/null +++ b/resnet/8ul1b23e/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b9fa062f1765c1a718d2b98094beb03f04d5e29b06e496b0e02119a003f8453 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0030044160/cfg.json b/resnet/8ul1b23e/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1f49d43a1c77ba58c1219a8076bdbbf621074f40 --- /dev/null +++ b/resnet/8ul1b23e/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0030044160/model b/resnet/8ul1b23e/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..8a07eb34e9dee6483b2bd5fd530ac2a862685068 --- /dev/null +++ b/resnet/8ul1b23e/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:962cacbb2ec3b20013299382d92fbfef58ce353b4849038ee336f3c81f2101ea +size 49105345 diff --git a/resnet/8ul1b23e/cp_0040058880/cfg.json b/resnet/8ul1b23e/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..762c283b37108d745e984d3360e1a4982e5f5e75 --- /dev/null +++ b/resnet/8ul1b23e/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0040058880/model b/resnet/8ul1b23e/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..170e6d259d064527871d88f2b8c5789a89ba71bf --- /dev/null +++ b/resnet/8ul1b23e/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f639d03f72443ab4578c29320095d353d150c0bd51d229864f97bc13b4c5829 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0050073600/cfg.json b/resnet/8ul1b23e/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dca502b03136966a9e0bd0cd878805594b85ed70 --- /dev/null +++ b/resnet/8ul1b23e/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0050073600/model b/resnet/8ul1b23e/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..a13196422a49357708a1cbb07ed9248e8354faea --- /dev/null +++ b/resnet/8ul1b23e/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606922e2da738a7d79c7bd38d40ec1b827c80e74d9deffde2e2020d5ab1b368f +size 49105345 diff --git a/resnet/8ul1b23e/cp_0060088320/cfg.json b/resnet/8ul1b23e/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..194763ecdff98f2dc9e25fac85a1aa02e024fbe5 --- /dev/null +++ b/resnet/8ul1b23e/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0060088320/model b/resnet/8ul1b23e/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..93e7cad429fd51572c6be5313bafbf726317233b --- /dev/null +++ b/resnet/8ul1b23e/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323d1adb06fc579be84c483eb024c402559722e9415bcd425e7e8e4e90fdf394 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0070103040/cfg.json b/resnet/8ul1b23e/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..83665a3f40ac558e9fb90aad66478bc6a45be094 --- /dev/null +++ b/resnet/8ul1b23e/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0070103040/model b/resnet/8ul1b23e/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..e9fb2757dd690409bfffe659d6e804f430ec78cf --- /dev/null +++ b/resnet/8ul1b23e/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44eefce6c7d70706620f53e207720097cc518cd78023b43c6b287283aa024674 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0080117760/cfg.json b/resnet/8ul1b23e/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9919022b416cc7bbb3206e0e1a5da5a375fbbfa9 --- /dev/null +++ b/resnet/8ul1b23e/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0080117760/model b/resnet/8ul1b23e/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..3d26c5b2617a8455907a05f6b3bafae3762b9c03 --- /dev/null +++ b/resnet/8ul1b23e/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4881fdabae23132771d69dfdbcfe6a62d1f25c32b3bb4912c370444ca1e76e0 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0090132480/cfg.json b/resnet/8ul1b23e/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4352d9b001b616260eb8ef41616a26e2a18e599d --- /dev/null +++ b/resnet/8ul1b23e/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0090132480/model b/resnet/8ul1b23e/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..c62dca91660626ba6a4162bc9147bc804d7a4f5a --- /dev/null +++ b/resnet/8ul1b23e/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a3d2eda2cf6fb45d585f86b04c99a7cb271b0f15852ab86768ee9d4ceeff65 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0100147200/cfg.json b/resnet/8ul1b23e/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8b11af428c6594a3bcee4c1b382cd3a25d125bb9 --- /dev/null +++ b/resnet/8ul1b23e/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0100147200/model b/resnet/8ul1b23e/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..ca7237418d0a90e2de12b53c42d19e3d71192023 --- /dev/null +++ b/resnet/8ul1b23e/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2141ea3615538160790588470783fab37b612d98686cee2ebefcb783698cc83 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0110161920/cfg.json b/resnet/8ul1b23e/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d5b3b63e559e4e4674b566b78ceec6916b7f8aae --- /dev/null +++ b/resnet/8ul1b23e/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0110161920/model b/resnet/8ul1b23e/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..2570aac123543c60feb2a6be44fa73eba48089bc --- /dev/null +++ b/resnet/8ul1b23e/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b64d1d855d1382b56e8ea71b22bb52cdb76b1f9384c652b49b54a6984c13ebb +size 49105345 diff --git a/resnet/8ul1b23e/cp_0120176640/cfg.json b/resnet/8ul1b23e/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e292580d8c61613d75cd36ebb6f817204efafc9d --- /dev/null +++ b/resnet/8ul1b23e/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0120176640/model b/resnet/8ul1b23e/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..f7a9dde447065f49bb656895dc81a357058dcc96 --- /dev/null +++ b/resnet/8ul1b23e/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb42cd8866051af5f24a61db8d17549e51e8aff8eb136a45be045000a06b1088 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0130191360/cfg.json b/resnet/8ul1b23e/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f0b5dc9c42145ef9676e0f47fb15f7df209a6f49 --- /dev/null +++ b/resnet/8ul1b23e/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0130191360/model b/resnet/8ul1b23e/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..fb8a6859a53eaea3b830ac2482a207bb871fedaf --- /dev/null +++ b/resnet/8ul1b23e/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5723e0ac16e23f4f3163437173e0c439fd0dcf8e89025a82a835ba2ea4a84ccb +size 49105345 diff --git a/resnet/8ul1b23e/cp_0140206080/cfg.json b/resnet/8ul1b23e/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bae4559c7f906a90fda6c14acf12592cdfc7a1fd --- /dev/null +++ b/resnet/8ul1b23e/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0140206080/model b/resnet/8ul1b23e/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..a1e93cc71f20b5770514e7464b7223a35c49d69b --- /dev/null +++ b/resnet/8ul1b23e/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7febc3f47095777bc730bf8200d98111292539064964236e6484ec36f674639 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0150220800/cfg.json b/resnet/8ul1b23e/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bc8ed5a57180107cbe18f34283a89fe34a1f5a33 --- /dev/null +++ b/resnet/8ul1b23e/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0150220800/model b/resnet/8ul1b23e/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..b2e04cc6b0b98afda2077d69eb5fb6a5faa20b6e --- /dev/null +++ b/resnet/8ul1b23e/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e0af3c106dcd0e7aa68468a0a08ddc6902a5f45cbd725f6d943a7a468766b91 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0160235520/cfg.json b/resnet/8ul1b23e/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c4d88052b24107bd4c50bc4ea7cdb12d06a8d03a --- /dev/null +++ b/resnet/8ul1b23e/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0160235520/model b/resnet/8ul1b23e/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..a40c6cd86846e794da8139cc21364b70d9952984 --- /dev/null +++ b/resnet/8ul1b23e/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab02f80e0993aacd559764107c0680b16a3f5ac81a3a5a121e5b8931c9514b32 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0170250240/cfg.json b/resnet/8ul1b23e/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..349b0fbb28dc325c6aed83b59a42046388e52457 --- /dev/null +++ b/resnet/8ul1b23e/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0170250240/model b/resnet/8ul1b23e/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..e42df29b0ccb5ad208e823294057209743ba74d9 --- /dev/null +++ b/resnet/8ul1b23e/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95cfa3de08a927a5e4bf64518a52d9509e9e715f1a72f729a7f89795bc8e753 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0180264960/cfg.json b/resnet/8ul1b23e/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a1ae7d0966853ba55b8420ef14aa60dd5f0e27dd --- /dev/null +++ b/resnet/8ul1b23e/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0180264960/model b/resnet/8ul1b23e/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..5a851a89b3b0c8180edd4b1a8eff9816da9a91fb --- /dev/null +++ b/resnet/8ul1b23e/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c01c0ddd9a6cb7633ff2bb92dec78f25377258fcd77faf5b725758c9e3239b3 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0190279680/cfg.json b/resnet/8ul1b23e/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4bea2f9ac86b32133acfe73b12d31b62fc781f75 --- /dev/null +++ b/resnet/8ul1b23e/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0190279680/model b/resnet/8ul1b23e/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..e91258fb3df55fffbe745bef5171a54778dee73a --- /dev/null +++ b/resnet/8ul1b23e/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6795a6b7df73eb6a69e716d78a119b52882e6b9c63d99fdf9bf3bc9090301a38 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0200294400/cfg.json b/resnet/8ul1b23e/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a79d129128e8e3772d333f58d7029a4193516fde --- /dev/null +++ b/resnet/8ul1b23e/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0200294400/model b/resnet/8ul1b23e/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..c99d7d760ce175062f726ceabb1113560ab752fc --- /dev/null +++ b/resnet/8ul1b23e/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75224ac2654370d6f4d8c8bb03be90603d1eae3e060e1fc20d00ed3882d43dd1 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0300441600/cfg.json b/resnet/8ul1b23e/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbedd3cb13ea760668a8897790ce0b95912f5ba --- /dev/null +++ b/resnet/8ul1b23e/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0300441600/model b/resnet/8ul1b23e/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..1efa455c5980c49833ad43e5f2d45f4d3ace8353 --- /dev/null +++ b/resnet/8ul1b23e/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184e9815a035a6d9afd080f76d122f478b1d2c047149cf96327184e6624348e0 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0400588800/cfg.json b/resnet/8ul1b23e/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..96e22ca0a5925d6cd83fb8f22126fc7f3c76a8e3 --- /dev/null +++ b/resnet/8ul1b23e/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0400588800/model b/resnet/8ul1b23e/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..fcd9259180d7bceae7f9a52313dc57f1d797b81e --- /dev/null +++ b/resnet/8ul1b23e/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb64012616a4a0af2248b06fdca01ba217ff1af838e297d89083f0d30572608 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0500736000/cfg.json b/resnet/8ul1b23e/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ddeccf7cf1c393af2dacafe0253ad81c0813bf9e --- /dev/null +++ b/resnet/8ul1b23e/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0500736000/model b/resnet/8ul1b23e/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..302cbfd01bc522094cc5e1f15a0a78ab10d464ab --- /dev/null +++ b/resnet/8ul1b23e/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3bb4186788ea9c6386208c340d2c0b8b9c7c352a90dcc00dbc22a8bb9973ff +size 49105345 diff --git a/resnet/8ul1b23e/cp_0600883200/cfg.json b/resnet/8ul1b23e/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fb48003737f028dbb580e9e19c5a8dd46d1c2ae5 --- /dev/null +++ b/resnet/8ul1b23e/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0600883200/model b/resnet/8ul1b23e/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..9536f36e614629062a909584ff57d296986e0d49 --- /dev/null +++ b/resnet/8ul1b23e/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099b5f5ba933c8042e63de206c69ee727ca24699dcae83dcd844c31d663cddc8 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0701030400/cfg.json b/resnet/8ul1b23e/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..81cfb9c970dba7c320e74e74bbf5ecd123fd6a10 --- /dev/null +++ b/resnet/8ul1b23e/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0701030400/model b/resnet/8ul1b23e/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..cd3cd6412d475b41febfe429210a6c61a0376687 --- /dev/null +++ b/resnet/8ul1b23e/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f03966b0c49f5ef88fad672327f90f2be93c5255f0cb8a964df06e64fe79bc +size 49105345 diff --git a/resnet/8ul1b23e/cp_0801177600/cfg.json b/resnet/8ul1b23e/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fd33377703dc4560efdabdd55cb89104f7f8ae9b --- /dev/null +++ b/resnet/8ul1b23e/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0801177600/model b/resnet/8ul1b23e/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..dab8345d5530a6713492ce5a1eeb5690f9b11ff1 --- /dev/null +++ b/resnet/8ul1b23e/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c0f697d8d74fe4151d2b1f6f291b71fc3bb2c742e3295f1209af2e1e2603679 +size 49105345 diff --git a/resnet/8ul1b23e/cp_0901324800/cfg.json b/resnet/8ul1b23e/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..eb927b5c3c2b171d56442cfa088b9021068f535c --- /dev/null +++ b/resnet/8ul1b23e/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_0901324800/model b/resnet/8ul1b23e/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..9faec0b38986a86fce704295eb0562d0a78e9ed5 --- /dev/null +++ b/resnet/8ul1b23e/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e7035ed88dfd6ea6df804108b15eb7a0f321fba453960f45508561f61b9b423 +size 49105345 diff --git a/resnet/8ul1b23e/cp_1001472000/cfg.json b/resnet/8ul1b23e/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c0a196c006ec2e9a7ea21fb48ff13db67b27860 --- /dev/null +++ b/resnet/8ul1b23e/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1001472000/model b/resnet/8ul1b23e/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..b31e3d97f0d01406404d2a19c1a53db99ce82fb0 --- /dev/null +++ b/resnet/8ul1b23e/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8116cfce32194cfcd8cab1acda0e8a1ecfa1200d447d1572871d17f987daad0 +size 49105345 diff --git a/resnet/8ul1b23e/cp_1101619200/cfg.json b/resnet/8ul1b23e/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19030bef24aca47fed3f5fe78279fa6e6d90a693 --- /dev/null +++ b/resnet/8ul1b23e/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1101619200/model b/resnet/8ul1b23e/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..b4aff9a38e606da01d83a0e0f1dc7fae3cdd85fa --- /dev/null +++ b/resnet/8ul1b23e/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494583055421c425af9016a6e7faa6a8ebcdfda46943776bd3c15b9982752d8f +size 49105345 diff --git a/resnet/8ul1b23e/cp_1201766400/cfg.json b/resnet/8ul1b23e/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dcce6005557cd56136c63b4e6d5701aebfba5e08 --- /dev/null +++ b/resnet/8ul1b23e/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1201766400/model b/resnet/8ul1b23e/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..d38c2310840c238209bff15f6bd2f49b76d96d89 --- /dev/null +++ b/resnet/8ul1b23e/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d295ee2f1950968445cdcc38dc3177beb617594d4a32297807c4d6b2e0f45a +size 49105345 diff --git a/resnet/8ul1b23e/cp_1301913600/cfg.json b/resnet/8ul1b23e/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1f9ed2f7733a12580943d1de83f4231e5fedd942 --- /dev/null +++ b/resnet/8ul1b23e/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1301913600/model b/resnet/8ul1b23e/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..c9a114c3e26805c988d6f21cd5e1e102990364ce --- /dev/null +++ b/resnet/8ul1b23e/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8617bfb7d12c39a2f266441db207091a947b736dedf783e65648bcf10b745b11 +size 49105345 diff --git a/resnet/8ul1b23e/cp_1402060800/cfg.json b/resnet/8ul1b23e/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..08b60e734b93217184d1b06e0f66fd7804996750 --- /dev/null +++ b/resnet/8ul1b23e/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1402060800/model b/resnet/8ul1b23e/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..6373560e75a223b2bb64a22577df0ff4912bce50 --- /dev/null +++ b/resnet/8ul1b23e/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8195713116d01d54ddd9e2298c6f222a70cfe25b12c9f3e5afd1fa610262ed1d +size 49105345 diff --git a/resnet/8ul1b23e/cp_1502208000/cfg.json b/resnet/8ul1b23e/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..13d7f589d04373ab00928c83302f457316a089d3 --- /dev/null +++ b/resnet/8ul1b23e/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1502208000/model b/resnet/8ul1b23e/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..b3c85ff69286365a6db9075788cc87c8e2f8366c --- /dev/null +++ b/resnet/8ul1b23e/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76392f9bede1419175f440290e8d990284f673acb9dd7ddcda6244ab12cd06d7 +size 49105345 diff --git a/resnet/8ul1b23e/cp_1602355200/cfg.json b/resnet/8ul1b23e/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..35359a3415158103b368411f83cae84a007a66be --- /dev/null +++ b/resnet/8ul1b23e/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1602355200/model b/resnet/8ul1b23e/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..5b2cb4fcafb18728ded06a1c8061a3d37ced4189 --- /dev/null +++ b/resnet/8ul1b23e/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3ba57db30afb33f5625eab00d0df1f25ac474af9708ae3cae3d6c53042aa38 +size 49105345 diff --git a/resnet/8ul1b23e/cp_1702502400/cfg.json b/resnet/8ul1b23e/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c5e2b2ec3474a91ea0726a1119688a3c5ad23d4c --- /dev/null +++ b/resnet/8ul1b23e/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1702502400/model b/resnet/8ul1b23e/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..dbe3e3ce55220ff8dc137547f29aa6691ce9569e --- /dev/null +++ b/resnet/8ul1b23e/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b22edfc4dbc0b84dcd2c280a07477b50018cb0dd43ec3e6317f1d0094edd1f1 +size 49105345 diff --git a/resnet/8ul1b23e/cp_1802649600/cfg.json b/resnet/8ul1b23e/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3396c34e601e008ed13f3cf58b89b0cd89159575 --- /dev/null +++ b/resnet/8ul1b23e/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1802649600/model b/resnet/8ul1b23e/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..d3c80fb28e59e9a327bc70b74399c7a7eb6e94e8 --- /dev/null +++ b/resnet/8ul1b23e/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f353ea4d87ce38610f1898b5671654c07ec6a72516caad46d1a7ac9d98c5362 +size 49105345 diff --git a/resnet/8ul1b23e/cp_1902796800/cfg.json b/resnet/8ul1b23e/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..86cd50ba84cba984602f3556eb6095318be9e3fa --- /dev/null +++ b/resnet/8ul1b23e/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_1902796800/model b/resnet/8ul1b23e/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..5d3d91c711be39bd8aac69152e42858335b827ec --- /dev/null +++ b/resnet/8ul1b23e/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac3d1984c2d53eb591e314b95f0918eab1d1c39eace29326d3d4c3eb7072ffa +size 49105345 diff --git a/resnet/8ul1b23e/cp_2002944000/cfg.json b/resnet/8ul1b23e/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a476227b3832aa5dc45dc1763f6f1d8446897b30 --- /dev/null +++ b/resnet/8ul1b23e/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 2106081856}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 938369233, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/resnet/8ul1b23e/cp_2002944000/model b/resnet/8ul1b23e/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..1a59209da959b8d96bf17fce07e7c3b12bda719c --- /dev/null +++ b/resnet/8ul1b23e/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa09adbe4fd02a48fde474c55ade9151ab0d0ad4c2670703dbedcf99f3fabec +size 49105345 diff --git a/resnet/syb50iz7/cp_0000998400/cfg.json b/resnet/syb50iz7/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dcaf1e877ed1882b6beb44871fd8cd1831816dee --- /dev/null +++ b/resnet/syb50iz7/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0000998400/model b/resnet/syb50iz7/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..c4f68cd4aa6197ccbc2fd4a704862899f0f3c948 --- /dev/null +++ b/resnet/syb50iz7/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c78a2e87b5cb83f3c1d2512070412628bd0ce74d2700a3be2fc43fb6615492 +size 49105345 diff --git a/resnet/syb50iz7/cp_0002001920/cfg.json b/resnet/syb50iz7/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b8c442e26208a5c8c27ae028ba1f88aca5f45da1 --- /dev/null +++ b/resnet/syb50iz7/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0002001920/model b/resnet/syb50iz7/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..d11e2e55f7ef4f3a8bb8dc67266260b33a302f5f --- /dev/null +++ b/resnet/syb50iz7/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4cc060591b07210d2fb2c72f6584041de0ea5f9e48b98873aaf319b03ee73b +size 49105345 diff --git a/resnet/syb50iz7/cp_0003000320/cfg.json b/resnet/syb50iz7/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4fa70e02f53cf91b2e23380e023acb88f6245565 --- /dev/null +++ b/resnet/syb50iz7/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0003000320/model b/resnet/syb50iz7/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..77e39c57623216548ad1182a15367e11b99922dd --- /dev/null +++ b/resnet/syb50iz7/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5756736636a4ca04d7f28309e33e1344390df9ed379f1a076fbb3ec550021fbc +size 49105345 diff --git a/resnet/syb50iz7/cp_0004003840/cfg.json b/resnet/syb50iz7/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e1e88439bb2087ca1d974a88342848fa20d0f229 --- /dev/null +++ b/resnet/syb50iz7/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0004003840/model b/resnet/syb50iz7/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..550e3bf61105c240454449fdaa506e74afa4007a --- /dev/null +++ b/resnet/syb50iz7/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23fb4e6a6eb5a81433379d5d1a2762ccccc7e8b4b119c94b6fd488ccde4e3d0 +size 49105345 diff --git a/resnet/syb50iz7/cp_0005007360/cfg.json b/resnet/syb50iz7/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..70d88167bfe3b7d8f051e40748416b06517bb9c6 --- /dev/null +++ b/resnet/syb50iz7/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0005007360/model b/resnet/syb50iz7/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..18a0928a78a4f60b2258f7a64e3150b053dbbb92 --- /dev/null +++ b/resnet/syb50iz7/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b6a51ca071009c7088ad551ad320298b3274a0ec41661e4ef2e126a0e2d32d +size 49105345 diff --git a/resnet/syb50iz7/cp_0006005760/cfg.json b/resnet/syb50iz7/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..218b31fc61b8f9fb509e1103ca69c9ce665259bb --- /dev/null +++ b/resnet/syb50iz7/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0006005760/model b/resnet/syb50iz7/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..966db3ef9a3abf5341ea35261a8767b311189081 --- /dev/null +++ b/resnet/syb50iz7/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f8968325234d3866e1bc804694cac2052f0aa091be8d83847ecd67333dbad0e +size 49105345 diff --git a/resnet/syb50iz7/cp_0007009280/cfg.json b/resnet/syb50iz7/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8a9fe018e34014f405019e5e71411371c69109a5 --- /dev/null +++ b/resnet/syb50iz7/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0007009280/model b/resnet/syb50iz7/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..e895862cbb642ac5e3fa52590457c4f3a4f2b1f6 --- /dev/null +++ b/resnet/syb50iz7/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f1fd3035f37abfb6ed6e6f88728d81bbb96f4e7a541a2dffe24596a57a5349 +size 49105345 diff --git a/resnet/syb50iz7/cp_0008007680/cfg.json b/resnet/syb50iz7/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..996d5a80166dc16d64c05a445f96f07ca83ae52f --- /dev/null +++ b/resnet/syb50iz7/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0008007680/model b/resnet/syb50iz7/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..22c0819b23189cc5c832eb397c7608e79d9a6823 --- /dev/null +++ b/resnet/syb50iz7/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac345566e88f185af07bba95182c7053791a8c8eae8bfd5d256a5c1be8faf7d +size 49105345 diff --git a/resnet/syb50iz7/cp_0009011200/cfg.json b/resnet/syb50iz7/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..316545f474277f729f2cb2c75c48ff397d9808fa --- /dev/null +++ b/resnet/syb50iz7/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0009011200/model b/resnet/syb50iz7/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..063e72a8ed4e01e05d2ce97f4ea1c096ca7dd0d6 --- /dev/null +++ b/resnet/syb50iz7/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a296fb52060e5c64536b3e1d0f91b9319a6f07b6c89d1769574356cffb327787 +size 49105345 diff --git a/resnet/syb50iz7/cp_0010014720/cfg.json b/resnet/syb50iz7/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..43591d68149973c5f52f4e54c936dc86884152e9 --- /dev/null +++ b/resnet/syb50iz7/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0010014720/model b/resnet/syb50iz7/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..c0304b8c253703c2842c260ff5277916ccf8d384 --- /dev/null +++ b/resnet/syb50iz7/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f4bc69865d3d14135bec2bbe5575bc98961c6ddd05c22d07935468eadd3760 +size 49105345 diff --git a/resnet/syb50iz7/cp_0011013120/cfg.json b/resnet/syb50iz7/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1b6f9d117c74547e9e170cff4c652fa1d377e652 --- /dev/null +++ b/resnet/syb50iz7/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0011013120/model b/resnet/syb50iz7/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..44383f680b065b4da0acdc4ae403b7088f843b37 --- /dev/null +++ b/resnet/syb50iz7/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5323c5653112763b507835c6719b5a6bb325720434b13ba9e83858ba30c056a6 +size 49105345 diff --git a/resnet/syb50iz7/cp_0012016640/cfg.json b/resnet/syb50iz7/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3c2e41bc0fde0f67ef1b288b435f9b197cbd42e8 --- /dev/null +++ b/resnet/syb50iz7/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0012016640/model b/resnet/syb50iz7/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..6309296e1506a2d6090f131756153c294ecd73a2 --- /dev/null +++ b/resnet/syb50iz7/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:953ba957a86718bd55917dc286b4b0bd77e006fb68d949672def8e0ab4c13b49 +size 49105345 diff --git a/resnet/syb50iz7/cp_0013015040/cfg.json b/resnet/syb50iz7/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..74e7536ebd24695a7a8d529ebfd6e8900e646c69 --- /dev/null +++ b/resnet/syb50iz7/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0013015040/model b/resnet/syb50iz7/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..ee9fe78d0185a307e31a004ede8fca03fc53ca3f --- /dev/null +++ b/resnet/syb50iz7/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b79991d58dd00547d513d02b868d843266f1166433bf977c7a7a66c015c5e89 +size 49105345 diff --git a/resnet/syb50iz7/cp_0014018560/cfg.json b/resnet/syb50iz7/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd4eccc8f58b6b79096ec3729dff4752807e0cd --- /dev/null +++ b/resnet/syb50iz7/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0014018560/model b/resnet/syb50iz7/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..9b5d54d42868d0a6fd1fe62d2439858461b939c3 --- /dev/null +++ b/resnet/syb50iz7/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fe5eb89f8a8f1a6d6473ae7a0c5f3ebe0d1f46dcdeb161e940a4a87900438e +size 49105345 diff --git a/resnet/syb50iz7/cp_0015022080/cfg.json b/resnet/syb50iz7/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bd3312a9c42a1d9343df63d341a36489a879dea3 --- /dev/null +++ b/resnet/syb50iz7/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0015022080/model b/resnet/syb50iz7/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..a36d87d5c1ae9cfb8871a4cc71090f22dde96f33 --- /dev/null +++ b/resnet/syb50iz7/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9c60134b6f6f4a63f29f14c43ee412f4a34a6d108540629e362a8abca4cd40 +size 49105345 diff --git a/resnet/syb50iz7/cp_0016020480/cfg.json b/resnet/syb50iz7/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dfd04fb050d6e18b242ec87aebd130ba6efb7cf4 --- /dev/null +++ b/resnet/syb50iz7/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0016020480/model b/resnet/syb50iz7/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..110b8c19acd0e8acdd91d9d9b54fce9505d581e1 --- /dev/null +++ b/resnet/syb50iz7/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83094bb53ced9f47dba25e17caea4a6d193d6e1046e8a02087638f6ccde26ded +size 49105345 diff --git a/resnet/syb50iz7/cp_0017024000/cfg.json b/resnet/syb50iz7/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c65ac909d739dd335081193cb902bf14c76ab236 --- /dev/null +++ b/resnet/syb50iz7/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0017024000/model b/resnet/syb50iz7/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..ae4d0e9927e57fa2f423c21fcf709a5ce887b342 --- /dev/null +++ b/resnet/syb50iz7/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9771abad9e76c2df82218a52d4d6df87b87e798ae0913bc19a47fc7b9975acd9 +size 49105345 diff --git a/resnet/syb50iz7/cp_0018022400/cfg.json b/resnet/syb50iz7/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fa6fa00b70da4a88797251d5b2f875869c19903d --- /dev/null +++ b/resnet/syb50iz7/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0018022400/model b/resnet/syb50iz7/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..6f8f6403cbac082187c1f4a3528edef80b08a606 --- /dev/null +++ b/resnet/syb50iz7/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed4e2016a8d8e60742fda7cfa5637d0dad2ca9e28b47bc493305e6b4e237ce1 +size 49105345 diff --git a/resnet/syb50iz7/cp_0019025920/cfg.json b/resnet/syb50iz7/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..193b0313a2ff55d350a0447ff15c14555ce8c121 --- /dev/null +++ b/resnet/syb50iz7/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0019025920/model b/resnet/syb50iz7/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..cd1915e30b5c9bdd154be043d1c4c2c3a44ca804 --- /dev/null +++ b/resnet/syb50iz7/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:015c8f5729b9406644dd891aa71d9eec9ddf724a150b77558543c5ea1ec940e7 +size 49105345 diff --git a/resnet/syb50iz7/cp_0020029440/cfg.json b/resnet/syb50iz7/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e75f3e473111c62465ebd755b8e38a22067c7163 --- /dev/null +++ b/resnet/syb50iz7/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0020029440/model b/resnet/syb50iz7/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..c0e0fafaa82fd78acd78ddb08661bb9b88a2e888 --- /dev/null +++ b/resnet/syb50iz7/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aacf54a114463a58ed070907b3f78ee0285b4bbf00ab2e56b93707b4bb076f83 +size 49105345 diff --git a/resnet/syb50iz7/cp_0030044160/cfg.json b/resnet/syb50iz7/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4731ded48708842d2ffd9cb7d5b64eb9ccf9d97e --- /dev/null +++ b/resnet/syb50iz7/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0030044160/model b/resnet/syb50iz7/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..12fa36ecb71b4fb768691a8c05a058e83770a2f3 --- /dev/null +++ b/resnet/syb50iz7/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f55a501bb6395992d7e13f7c9fe7c2154585fd84e31769adb3729000e56172 +size 49105345 diff --git a/resnet/syb50iz7/cp_0040058880/cfg.json b/resnet/syb50iz7/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3f2dfa37970c91b6537eecdb336d4731f4f974d5 --- /dev/null +++ b/resnet/syb50iz7/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0040058880/model b/resnet/syb50iz7/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..20496ab5188e9fbdadfb1a503f5fc0fe6e8ec0b4 --- /dev/null +++ b/resnet/syb50iz7/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2b7f8b76769b3e63e2c3d39067c35b89ce0ccd9f58cbf86a130203523c03db +size 49105345 diff --git a/resnet/syb50iz7/cp_0050073600/cfg.json b/resnet/syb50iz7/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..09c0230b4a1fac5611b42e0bf513fd0925b1137d --- /dev/null +++ b/resnet/syb50iz7/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0050073600/model b/resnet/syb50iz7/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..3d8afb73c3a97692c3adab35904326c7e2bc400d --- /dev/null +++ b/resnet/syb50iz7/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a33daceb20067a853e99ac57211fc7491d3c0aa0e6f12dffa841dcee1b9574 +size 49105345 diff --git a/resnet/syb50iz7/cp_0060088320/cfg.json b/resnet/syb50iz7/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3ca9c81aa5d207007782a0a93df6328a7d5652ce --- /dev/null +++ b/resnet/syb50iz7/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0060088320/model b/resnet/syb50iz7/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..d4d16fe847e1ad68f10a1eea7b087cf211ded57f --- /dev/null +++ b/resnet/syb50iz7/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6756cebcd665cc4031572875ae7fd56205c4ef4948fbead5c7316b5025789010 +size 49105345 diff --git a/resnet/syb50iz7/cp_0070103040/cfg.json b/resnet/syb50iz7/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d28ffa58f654a0b42f21ecf2fe8b2029d5dd704c --- /dev/null +++ b/resnet/syb50iz7/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0070103040/model b/resnet/syb50iz7/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..ccf7831c3e43ea80e42c7c5e5dce06d21c9a2b95 --- /dev/null +++ b/resnet/syb50iz7/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd6bfab46c17b08fcb0979a7cd3151b0c06553c3bc9af912bc3d07760788265 +size 49105345 diff --git a/resnet/syb50iz7/cp_0080117760/cfg.json b/resnet/syb50iz7/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..be81c027e6eed555e76db6832342a5af901d6f59 --- /dev/null +++ b/resnet/syb50iz7/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0080117760/model b/resnet/syb50iz7/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..e2756688db32fc11cee3c47e2575e85b19e46ae7 --- /dev/null +++ b/resnet/syb50iz7/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250c18cb902269e9b9ffb9d27b1feb156bc04612cf8d3f4cfa7371417a8d0ae4 +size 49105345 diff --git a/resnet/syb50iz7/cp_0090132480/cfg.json b/resnet/syb50iz7/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f83ddd617c4f3968da3366b95780caae42a176b3 --- /dev/null +++ b/resnet/syb50iz7/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0090132480/model b/resnet/syb50iz7/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..03fe2dd79bcdc3f56d0d60e9cbf4b828706e796a --- /dev/null +++ b/resnet/syb50iz7/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7f5ebdc708ac47709a532e0ef37b1aa3aeb2c869131edbbfa92eae08279f8d +size 49105345 diff --git a/resnet/syb50iz7/cp_0100147200/cfg.json b/resnet/syb50iz7/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b20dfbe448c48145c752e68722fe0a0b62caa7 --- /dev/null +++ b/resnet/syb50iz7/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0100147200/model b/resnet/syb50iz7/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..e4fe41cd2b58c3178c73aef2e04c7a48cf8ad2bb --- /dev/null +++ b/resnet/syb50iz7/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06533dd993d78ba406090f284f13eef7e6b60a7a94de2b0857bcf6103b903282 +size 49105345 diff --git a/resnet/syb50iz7/cp_0110161920/cfg.json b/resnet/syb50iz7/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b0c903fd74c8dbcfd684f2a2878924ed586ca465 --- /dev/null +++ b/resnet/syb50iz7/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0110161920/model b/resnet/syb50iz7/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..3b16ba46b504f39ec395ccb2d4d59fead0654dd2 --- /dev/null +++ b/resnet/syb50iz7/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254adb88653b4cf29dac33dbfe7fa199a023fa346521967ec8fbe29f57e3e78a +size 49105345 diff --git a/resnet/syb50iz7/cp_0120176640/cfg.json b/resnet/syb50iz7/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d0a517d63ea8cfae547eb7d4e1bb171120da4ce6 --- /dev/null +++ b/resnet/syb50iz7/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0120176640/model b/resnet/syb50iz7/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..6ffc7b679b362d4dc8df3b0afcd240e9a4b10790 --- /dev/null +++ b/resnet/syb50iz7/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be15ef2756b932cc240f85307de2f11ae1b607360e2386250651e2b91e9ec23f +size 49105345 diff --git a/resnet/syb50iz7/cp_0130191360/cfg.json b/resnet/syb50iz7/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..267b6ccf3764a4142d12e42c89cfbd3a276962fd --- /dev/null +++ b/resnet/syb50iz7/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0130191360/model b/resnet/syb50iz7/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..a25062852c5c4090733f9905a8f3811735c86582 --- /dev/null +++ b/resnet/syb50iz7/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86695c62115ec1481cc613f6c8efec9d97e6fe36d21e5e3a9dc04acbbf94b4bd +size 49105345 diff --git a/resnet/syb50iz7/cp_0140206080/cfg.json b/resnet/syb50iz7/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fff281fdc43cad8e3e2636a34209ebd5e7d0a2c7 --- /dev/null +++ b/resnet/syb50iz7/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0140206080/model b/resnet/syb50iz7/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..599a2434455c19785485b8181c335aa2775d1c2a --- /dev/null +++ b/resnet/syb50iz7/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a501487c74148461d02d79d3c6c70e12ee8672870fc4004e83fea88880a57d +size 49105345 diff --git a/resnet/syb50iz7/cp_0150220800/cfg.json b/resnet/syb50iz7/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..78f0036985cda92542a6553092c0d63477d17f89 --- /dev/null +++ b/resnet/syb50iz7/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0150220800/model b/resnet/syb50iz7/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..350d93b58fc932a3186ec1ac39acfc08517275bd --- /dev/null +++ b/resnet/syb50iz7/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf5138d73b6b384fef657d36196c998c5607bbca096fe5ab80dc713787bc3d6 +size 49105345 diff --git a/resnet/syb50iz7/cp_0160235520/cfg.json b/resnet/syb50iz7/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2597b2b31465223a592cc2a21d2e758e36bdd692 --- /dev/null +++ b/resnet/syb50iz7/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0160235520/model b/resnet/syb50iz7/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..2ece32d3148949534b8c3f5223e7231bcec68ab0 --- /dev/null +++ b/resnet/syb50iz7/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ee6dbc4ad191783b6a97adf87b8523dd08f3c30b2ea21a314b7ab2a316d806 +size 49105345 diff --git a/resnet/syb50iz7/cp_0170250240/cfg.json b/resnet/syb50iz7/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9348b3bb49de83d8ee1766fca246b57ff7820ed7 --- /dev/null +++ b/resnet/syb50iz7/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0170250240/model b/resnet/syb50iz7/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..734479d7df5b94c159a1ebd0bc18d74b46c4f168 --- /dev/null +++ b/resnet/syb50iz7/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b96b8afd364f45fb3da26a9c983bc61720636aa805dbfc67f559a4e98cf1982 +size 49105345 diff --git a/resnet/syb50iz7/cp_0180264960/cfg.json b/resnet/syb50iz7/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f246c34d32395c731b198e6a41b84a5401e7e74f --- /dev/null +++ b/resnet/syb50iz7/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0180264960/model b/resnet/syb50iz7/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..65097f0dc2be8b9afae37ff553cf7e4d9c99b181 --- /dev/null +++ b/resnet/syb50iz7/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14838f2da7877ef325cdb72f9b8c175ad1f144a8d8f6b90ce7ada8482d452f8e +size 49105345 diff --git a/resnet/syb50iz7/cp_0190279680/cfg.json b/resnet/syb50iz7/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..54e1f8846d68594a7638191b1f301251d5e7f01a --- /dev/null +++ b/resnet/syb50iz7/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0190279680/model b/resnet/syb50iz7/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..c5753ab75069227d9ef9fb208b14a30311a45051 --- /dev/null +++ b/resnet/syb50iz7/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7164e9ff5ce0bb7951d8d79dd62c3430cadaa3bf17344bc354b80985492a1e7 +size 49105345 diff --git a/resnet/syb50iz7/cp_0200294400/cfg.json b/resnet/syb50iz7/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2896ae46919b90b726508f89519c59c408c3aafc --- /dev/null +++ b/resnet/syb50iz7/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0200294400/model b/resnet/syb50iz7/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..1e2c6990cef0a4de822d2d711795b1fcd5ea41e5 --- /dev/null +++ b/resnet/syb50iz7/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3fc201f59e9ad51bd90e9aef3930bf819b081ab3bfbf8e57f864dc20c88862 +size 49105345 diff --git a/resnet/syb50iz7/cp_0300441600/cfg.json b/resnet/syb50iz7/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f2cbabf05c31135e21b30b3e67514e1a34fea299 --- /dev/null +++ b/resnet/syb50iz7/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0300441600/model b/resnet/syb50iz7/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..b1cc9c10ce67b4c3006d763ac80a82fe29d64de4 --- /dev/null +++ b/resnet/syb50iz7/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e393bdbf99515ca2c10d2a52445d7b525093c2ccb7b926231a306740388c24 +size 49105345 diff --git a/resnet/syb50iz7/cp_0400588800/cfg.json b/resnet/syb50iz7/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b4a94ecca5d8963c0c5dd2fcad62068f29a8ad2d --- /dev/null +++ b/resnet/syb50iz7/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0400588800/model b/resnet/syb50iz7/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..4f32ae7bc5a8c8be9f77bd01c6b7c3954bf5eef2 --- /dev/null +++ b/resnet/syb50iz7/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aea69a3b5749c8af8fff83e9c52c4ba8f2b162432af5a7a3d3944599dff8ce5f +size 49105345 diff --git a/resnet/syb50iz7/cp_0500736000/cfg.json b/resnet/syb50iz7/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a1441b25864ba00c4ea03d52ec1f280f8417f8c6 --- /dev/null +++ b/resnet/syb50iz7/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0500736000/model b/resnet/syb50iz7/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..1924e74cc704344a725d1131d19ceb47484659f3 --- /dev/null +++ b/resnet/syb50iz7/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830985145cc3080de9507ad2e8d7e4fa225dc23ab82f0c1003f62d6ce0c83b5b +size 49105345 diff --git a/resnet/syb50iz7/cp_0600883200/cfg.json b/resnet/syb50iz7/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..91c1612769d9f53f0a478d594552694b71f6cc37 --- /dev/null +++ b/resnet/syb50iz7/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0600883200/model b/resnet/syb50iz7/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..d0efbe98250abe40096095040a754259407fe803 --- /dev/null +++ b/resnet/syb50iz7/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd8ba901aaf488e78e1db0138814e3ad53271c9c75fa1195d62b240ae4be841 +size 49105345 diff --git a/resnet/syb50iz7/cp_0701030400/cfg.json b/resnet/syb50iz7/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4364911cc44457c9f67d5f9d48ffb753b8c8c53c --- /dev/null +++ b/resnet/syb50iz7/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0701030400/model b/resnet/syb50iz7/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..94dbc4e50904955fd859a0526eda1e538b29fe2f --- /dev/null +++ b/resnet/syb50iz7/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0786016e41d0698dc3cfaf03b82063d1509de57e4bb6c3d17afa540580a66c4 +size 49105345 diff --git a/resnet/syb50iz7/cp_0801177600/cfg.json b/resnet/syb50iz7/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a67f8da63852ccf44911d339da4db23f5525a0c2 --- /dev/null +++ b/resnet/syb50iz7/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0801177600/model b/resnet/syb50iz7/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..37a65d566b9acfe64eff4827afbab781ad4f0f21 --- /dev/null +++ b/resnet/syb50iz7/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:273fafe786b1705e65eab0eebf9ab6cd753f257de9ecc666ab3087b116089bdf +size 49105345 diff --git a/resnet/syb50iz7/cp_0901324800/cfg.json b/resnet/syb50iz7/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ada93337ae483c36925a0c62d1eb63e784df27d0 --- /dev/null +++ b/resnet/syb50iz7/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_0901324800/model b/resnet/syb50iz7/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..969ca32e717ceee145b2123485895de5aba54abf --- /dev/null +++ b/resnet/syb50iz7/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73d972daf9e619127cbd2c83091ab1ff4311ce36e03b74f87fcfbf3567856de0 +size 49105345 diff --git a/resnet/syb50iz7/cp_1001472000/cfg.json b/resnet/syb50iz7/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..fc51dad8aff86f59586878d625e0aae50c7d5dfb --- /dev/null +++ b/resnet/syb50iz7/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1001472000/model b/resnet/syb50iz7/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..c65408ab1f205e7a8337a1a8ed8e55185038e858 --- /dev/null +++ b/resnet/syb50iz7/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4d0e7a8e04e7302f2443a8208da30e9ce6585e60039d78159e6ff490a550149 +size 49105345 diff --git a/resnet/syb50iz7/cp_1101619200/cfg.json b/resnet/syb50iz7/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1a57b9b37723d0f0ab9f6d252e3c88927023e825 --- /dev/null +++ b/resnet/syb50iz7/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1101619200/model b/resnet/syb50iz7/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..ed5f6fcba7848b0fc0508e93e9553787f5fa0666 --- /dev/null +++ b/resnet/syb50iz7/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ae49864fa2418ca4adb6e75e4ef3c4d4611710f17180ccbbedc11d4106f486 +size 49105345 diff --git a/resnet/syb50iz7/cp_1201766400/cfg.json b/resnet/syb50iz7/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4caf4f978516fa5ff23a752aa4d22d36fb464e58 --- /dev/null +++ b/resnet/syb50iz7/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1201766400/model b/resnet/syb50iz7/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..dba1222f513d8e90cb1a1d5e9dc653c1d9f254f6 --- /dev/null +++ b/resnet/syb50iz7/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab29679a8a71a2ddb94677d62981efce1a28cb52a011a91ebdb65f022b7fd97b +size 49105345 diff --git a/resnet/syb50iz7/cp_1301913600/cfg.json b/resnet/syb50iz7/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..38f4d2f61d97acc917b9e743261e15c94fcfe418 --- /dev/null +++ b/resnet/syb50iz7/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1301913600/model b/resnet/syb50iz7/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..3df83c4ae63df6691aef5eea7910f44153359c27 --- /dev/null +++ b/resnet/syb50iz7/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff48100a08fc636e046b47da5e52bbadf5811269e1fa216a0f7c10d130e4a789 +size 49105345 diff --git a/resnet/syb50iz7/cp_1402060800/cfg.json b/resnet/syb50iz7/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d923bff92b711df135eff52339718c832f614076 --- /dev/null +++ b/resnet/syb50iz7/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1402060800/model b/resnet/syb50iz7/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..59fdd999241a7aec4ea08330def848955d055ec1 --- /dev/null +++ b/resnet/syb50iz7/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db95b0e14f9ec68b8e71bc9b9de188242831be19f1cb29160a36d66d8fbc5556 +size 49105345 diff --git a/resnet/syb50iz7/cp_1502208000/cfg.json b/resnet/syb50iz7/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..df78ebe01e2b5fe33c11ed1c0d649241e332599a --- /dev/null +++ b/resnet/syb50iz7/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1502208000/model b/resnet/syb50iz7/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..b4cff7c2cb4c3701ac79ba7127363876cebd1052 --- /dev/null +++ b/resnet/syb50iz7/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c01c05ee3cfe8c84121b3095e30027365c2decbe753477c9bfcf5f01ccf86ed +size 49105345 diff --git a/resnet/syb50iz7/cp_1602355200/cfg.json b/resnet/syb50iz7/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..362e99cdcedbd2b7f7a361639f564e8e59f76a63 --- /dev/null +++ b/resnet/syb50iz7/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1602355200/model b/resnet/syb50iz7/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..b885533f7b747b4d736c1a1d2e00d406a5bd90b9 --- /dev/null +++ b/resnet/syb50iz7/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2722f195072510f41733de32e14a3b9d2321471db1f0dc9a152ef7dc21a3410e +size 49105345 diff --git a/resnet/syb50iz7/cp_1702502400/cfg.json b/resnet/syb50iz7/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e14b173b141e5148eff6fb16405ed2e1205aa16b --- /dev/null +++ b/resnet/syb50iz7/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1702502400/model b/resnet/syb50iz7/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..458feffc2272cde5db9cda7418b7f1593be64ff0 --- /dev/null +++ b/resnet/syb50iz7/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a891e3100df30c380ced8a99fac992932f3e2d68119772c0be58d2883daf2fdc +size 49105345 diff --git a/resnet/syb50iz7/cp_1802649600/cfg.json b/resnet/syb50iz7/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e79382ce85fc11e26373cebab2d6c7e567e1b904 --- /dev/null +++ b/resnet/syb50iz7/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1802649600/model b/resnet/syb50iz7/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..4262d873e94c0e335478d89ee90144122e0858c7 --- /dev/null +++ b/resnet/syb50iz7/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1615118941fb6a99cbd0988e0d5c140441a333f02807838318a0c5725a3defb4 +size 49105345 diff --git a/resnet/syb50iz7/cp_1902796800/cfg.json b/resnet/syb50iz7/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c692fa722ba0b73f8e483047e4e70e70fee79954 --- /dev/null +++ b/resnet/syb50iz7/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_1902796800/model b/resnet/syb50iz7/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..e2c75ba814e4873304648dac4585de3e5bd304a2 --- /dev/null +++ b/resnet/syb50iz7/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d462cfca4fcc4ebed7e4e07d4e6bbff277daa3f2985d5433a2f8993f40417bf +size 49105345 diff --git a/resnet/syb50iz7/cp_2002944000/cfg.json b/resnet/syb50iz7/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8ecba53fee53c4aa7d6dbeff2610ea77c7ff1450 --- /dev/null +++ b/resnet/syb50iz7/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 247805381}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 901457989, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/resnet/syb50iz7/cp_2002944000/model b/resnet/syb50iz7/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..06327f4a02a71f1fcd790b438c3b71d34416e4d6 --- /dev/null +++ b/resnet/syb50iz7/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c31cdf5b99eb119abb4d12c798d785c7e0e7efdae14e823f9721fdda35499c +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0000998400/cfg.json b/resnet/zgyp3v0o/cp_0000998400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4af41515d7852a617493ff05a6b7daa8eb025fbf --- /dev/null +++ b/resnet/zgyp3v0o/cp_0000998400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0000998400/model b/resnet/zgyp3v0o/cp_0000998400/model new file mode 100644 index 0000000000000000000000000000000000000000..0328471ce405f9f00acc50de395e666f345e3574 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0000998400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e204522a680815f1989252601ebbeb8e2afe199cc576f6c0812ee831e5ac41 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0002001920/cfg.json b/resnet/zgyp3v0o/cp_0002001920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9c930124c00139d8f8e5a3991174e9e837496cbe --- /dev/null +++ b/resnet/zgyp3v0o/cp_0002001920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0002001920/model b/resnet/zgyp3v0o/cp_0002001920/model new file mode 100644 index 0000000000000000000000000000000000000000..1b0706dd2d54934eee89be5ebd764d402cd1c8c6 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0002001920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e029bf5a0930e8c4a91da582bb1aa2e2df6fa3dbe95c7a9c57577a364eca9c7 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0003000320/cfg.json b/resnet/zgyp3v0o/cp_0003000320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..586c5e86110e8b7b38d476152dbc3f528ce8859e --- /dev/null +++ b/resnet/zgyp3v0o/cp_0003000320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 586} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0003000320/model b/resnet/zgyp3v0o/cp_0003000320/model new file mode 100644 index 0000000000000000000000000000000000000000..a69eb7fc98c4d182167fdb465da62f44f0cb5768 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0003000320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:affdf6acb95312c00b98127b1c12544668ef5e32bcc5452116c847e20f68d5a9 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0004003840/cfg.json b/resnet/zgyp3v0o/cp_0004003840/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..78742cf36e20e92780c366b47eeca1dfe049a48e --- /dev/null +++ b/resnet/zgyp3v0o/cp_0004003840/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 782} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0004003840/model b/resnet/zgyp3v0o/cp_0004003840/model new file mode 100644 index 0000000000000000000000000000000000000000..bb8a3bb9092e494fdfc90ab704223aedf4205f91 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0004003840/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afba4f64242b6714615fa5192c2c4ccba589d1cb4f829bc6e6c2818c35c080b5 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0005007360/cfg.json b/resnet/zgyp3v0o/cp_0005007360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b61248cfe33bd64a4107bf1c56597c506cb7eb55 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0005007360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 978} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0005007360/model b/resnet/zgyp3v0o/cp_0005007360/model new file mode 100644 index 0000000000000000000000000000000000000000..0210dd6c209f7dca936f5e1244684da49403b537 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0005007360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaabd88f3fd41181bcc2573b397a14cc4af9c009da2a4cdfe690f043734a6975 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0006005760/cfg.json b/resnet/zgyp3v0o/cp_0006005760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1399c533798685c0b0acdd19cb150d64fea2825c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0006005760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1173} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0006005760/model b/resnet/zgyp3v0o/cp_0006005760/model new file mode 100644 index 0000000000000000000000000000000000000000..cc202a11d26a6f99aee6ff73c6db45180db21e9f --- /dev/null +++ b/resnet/zgyp3v0o/cp_0006005760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debe790937ee950079cb176f2a5ef1ed7902299397da4d87a52497c5c183c091 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0007009280/cfg.json b/resnet/zgyp3v0o/cp_0007009280/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e604d9f262bc2cac45160e2b851ab4522096845a --- /dev/null +++ b/resnet/zgyp3v0o/cp_0007009280/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1369} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0007009280/model b/resnet/zgyp3v0o/cp_0007009280/model new file mode 100644 index 0000000000000000000000000000000000000000..5e7cdbad89bc71bd29eb17662ef2b520d497370c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0007009280/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3559e7c948ee59253ab91128b1012e83ae3f1495fbc630293ea3213d94ff1274 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0008007680/cfg.json b/resnet/zgyp3v0o/cp_0008007680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..f6551785352a7dd811a876839be097a7d2ef3824 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0008007680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1564} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0008007680/model b/resnet/zgyp3v0o/cp_0008007680/model new file mode 100644 index 0000000000000000000000000000000000000000..70b41c3ad9f9147345d5f4723b26dada1db78b43 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0008007680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e5d5c3bd8fc60835305617929a6b326e41e83c0611dffc82c894563c1f625f +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0009011200/cfg.json b/resnet/zgyp3v0o/cp_0009011200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4290ba49d6d19064a37448bda5fcecee3a616869 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0009011200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1760} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0009011200/model b/resnet/zgyp3v0o/cp_0009011200/model new file mode 100644 index 0000000000000000000000000000000000000000..5973123a77559e73346c78d4e708b8eef616fec7 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0009011200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39fa04db3ab8bc67ee69edf452c1f0ab8eda0cf021599ab9ab76fe91a01bd4df +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0010014720/cfg.json b/resnet/zgyp3v0o/cp_0010014720/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..708264e004700609f0230a576b32d77a450beb74 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0010014720/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 1956} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0010014720/model b/resnet/zgyp3v0o/cp_0010014720/model new file mode 100644 index 0000000000000000000000000000000000000000..807722907e3c24e9f30c22b85e82ba0c691c6782 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0010014720/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776a48a1da22f57c447a7d4b55dffdab0b52b945d5241abecd1c03280cca8521 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0011013120/cfg.json b/resnet/zgyp3v0o/cp_0011013120/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4726c3bef5fd2b057035bae24957252bd7e937b1 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0011013120/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2151} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0011013120/model b/resnet/zgyp3v0o/cp_0011013120/model new file mode 100644 index 0000000000000000000000000000000000000000..9e1fa10686648ad2aecfd7b2a0223a7f1725a311 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0011013120/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607672cffd577505c868ea9d887bbee0c73ffbabe4b34d4818c9aac5df3532e8 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0012016640/cfg.json b/resnet/zgyp3v0o/cp_0012016640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9a5b784f509c8f4c48e110ee267fdddff49342bd --- /dev/null +++ b/resnet/zgyp3v0o/cp_0012016640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2347} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0012016640/model b/resnet/zgyp3v0o/cp_0012016640/model new file mode 100644 index 0000000000000000000000000000000000000000..e5dd70282ff73bfae381af62560966daf55825e0 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0012016640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1b1127b9f617c35c893ff91ca691875daa7cd18c873c56d681ac65175ed3ae +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0013015040/cfg.json b/resnet/zgyp3v0o/cp_0013015040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ba6a4e71e8823ad9cb7b14846b754fcc65d889c7 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0013015040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2542} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0013015040/model b/resnet/zgyp3v0o/cp_0013015040/model new file mode 100644 index 0000000000000000000000000000000000000000..99ec80082d8d519a1a9fa3c9bc01c971ff4d6e6c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0013015040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01bfb29f605bdfabefe5eb605b43bd7755ae39dda7566582b12193fd3e5c1615 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0014018560/cfg.json b/resnet/zgyp3v0o/cp_0014018560/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3ce0c086aca862e873784d70cf12a40e4f512761 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0014018560/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2738} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0014018560/model b/resnet/zgyp3v0o/cp_0014018560/model new file mode 100644 index 0000000000000000000000000000000000000000..3510005be51c9f76c6f2b4fafb224b4781e27b64 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0014018560/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c8070a18f3968b8f4abbcae6fe5308b2bf35396ee4a02732b496b842d05d0a1 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0015022080/cfg.json b/resnet/zgyp3v0o/cp_0015022080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..dca04865a69b548f3e1793f441df059b966f5054 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0015022080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 2934} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0015022080/model b/resnet/zgyp3v0o/cp_0015022080/model new file mode 100644 index 0000000000000000000000000000000000000000..2cf5dc773c2d73f27d83d5182e2797882f110fed --- /dev/null +++ b/resnet/zgyp3v0o/cp_0015022080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c7381b67d998421d9cdada3f35adacc50f1ae8a74ec639d4f9ce7d2e09c2ec +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0016020480/cfg.json b/resnet/zgyp3v0o/cp_0016020480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8d371e8ca090db101cbdb8d7f0af79748345ca13 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0016020480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3129} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0016020480/model b/resnet/zgyp3v0o/cp_0016020480/model new file mode 100644 index 0000000000000000000000000000000000000000..146f60053347fe46e84967fb16f7a9a6bc6be19c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0016020480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91c9bc9b05da7eed7b739743aec9cf4612d64b5d3087ff1b3f92bb047eac0ca +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0017024000/cfg.json b/resnet/zgyp3v0o/cp_0017024000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3971501563ea220435f082c43bb121065ae3906c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0017024000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3325} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0017024000/model b/resnet/zgyp3v0o/cp_0017024000/model new file mode 100644 index 0000000000000000000000000000000000000000..4d06a0074f19f94451354fa00e8449e84c1306a9 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0017024000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55418fa0120b19a159f6f5fa6461bd59da01a65fb9352ef3b150d08fe6e43aef +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0018022400/cfg.json b/resnet/zgyp3v0o/cp_0018022400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2074d61c9982921ef52029cfb43e77329287a94e --- /dev/null +++ b/resnet/zgyp3v0o/cp_0018022400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3520} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0018022400/model b/resnet/zgyp3v0o/cp_0018022400/model new file mode 100644 index 0000000000000000000000000000000000000000..88eda7b8e51b2e16c28a667e7288b04ec7cc3d79 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0018022400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc37fbbf460ef70f17d21cf1b3fa45c0d851775baa1eec039d2c4cc02d255fb +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0019025920/cfg.json b/resnet/zgyp3v0o/cp_0019025920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..8bbdd534553990de2e2700f0f3dd9e26d8ecc951 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0019025920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3716} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0019025920/model b/resnet/zgyp3v0o/cp_0019025920/model new file mode 100644 index 0000000000000000000000000000000000000000..65f4c532bfa97a143ff2a007d2f5a92da8410fbf --- /dev/null +++ b/resnet/zgyp3v0o/cp_0019025920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78c9ba29973e6ad6524681b6c4fafca1033bb3a76ff474361cf5ae4b1826ded +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0020029440/cfg.json b/resnet/zgyp3v0o/cp_0020029440/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6460f83c009852e099df66cc0b8f1c937c9be321 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0020029440/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 3912} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0020029440/model b/resnet/zgyp3v0o/cp_0020029440/model new file mode 100644 index 0000000000000000000000000000000000000000..7d734f406de2dae455b4b225e7e52513fbf56c9c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0020029440/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca9c8f42eb391cc70b358cf1e86f9dfd0c0661b52cb6e7f84d397393d6c0c66 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0030044160/cfg.json b/resnet/zgyp3v0o/cp_0030044160/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1eb5f7313e7bea7b25fcebcd211884de348127c4 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0030044160/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 5868} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0030044160/model b/resnet/zgyp3v0o/cp_0030044160/model new file mode 100644 index 0000000000000000000000000000000000000000..07a824ed6f88b40f10147a9c017d603469a88e48 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0030044160/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3169c3e7813410bef9d22c1cd3a783251f5be1b96a7f970bc888a89fad594fd +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0040058880/cfg.json b/resnet/zgyp3v0o/cp_0040058880/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4dbb0223920fcda69d00b0f7c4b7e33cbb698ed4 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0040058880/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 7824} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0040058880/model b/resnet/zgyp3v0o/cp_0040058880/model new file mode 100644 index 0000000000000000000000000000000000000000..7f5c3bb214b0266cd41d0a97263d0710835f3154 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0040058880/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7230491f6e23f929c461492d08add1909716f09b6d5a7b98f01da617dd025531 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0050073600/cfg.json b/resnet/zgyp3v0o/cp_0050073600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..347e59a41207ca6938f6f5d2070a4ff1bd3329e7 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0050073600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 9780} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0050073600/model b/resnet/zgyp3v0o/cp_0050073600/model new file mode 100644 index 0000000000000000000000000000000000000000..0844a1a8fdadff61657fb22aaa57b7b3e1ab3524 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0050073600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960d3337ac2b3ea5b93cc8819c985eb616acfdf4c28831a625d9c1a6855fa901 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0060088320/cfg.json b/resnet/zgyp3v0o/cp_0060088320/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..75110e2055247bece6cc008b5249c930aebccc59 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0060088320/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 11736} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0060088320/model b/resnet/zgyp3v0o/cp_0060088320/model new file mode 100644 index 0000000000000000000000000000000000000000..a76923fe06e7a3bee7fabe0d6cc3ca70e3b8031b --- /dev/null +++ b/resnet/zgyp3v0o/cp_0060088320/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34dc3d92f0cc52c4350fb29873e3226b58c5c9cc81b5860b3c6e1db7ed7a3156 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0070103040/cfg.json b/resnet/zgyp3v0o/cp_0070103040/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..771d3b0d0e1adca3194873703ed6488d544940c9 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0070103040/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 13692} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0070103040/model b/resnet/zgyp3v0o/cp_0070103040/model new file mode 100644 index 0000000000000000000000000000000000000000..873f64b538509d0c2a7e8627970867480f7b0a59 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0070103040/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c325628b749eed917634e63a17a8f4b21eaf869fa217203c2bb6c76277bd1bd2 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0080117760/cfg.json b/resnet/zgyp3v0o/cp_0080117760/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..3dcede2482c3620450bc0d2b0dccfabf582b90ea --- /dev/null +++ b/resnet/zgyp3v0o/cp_0080117760/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 15648} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0080117760/model b/resnet/zgyp3v0o/cp_0080117760/model new file mode 100644 index 0000000000000000000000000000000000000000..b93190f6ca7ac26a85803f44a4ee857bbd2db1d3 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0080117760/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe313eb18805b611d4b8f67729a4b50dd845c744487d1c3e20d171ca64c901c3 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0090132480/cfg.json b/resnet/zgyp3v0o/cp_0090132480/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d93aeba59dc5b491ca846b725088a311d855b02b --- /dev/null +++ b/resnet/zgyp3v0o/cp_0090132480/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 17604} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0090132480/model b/resnet/zgyp3v0o/cp_0090132480/model new file mode 100644 index 0000000000000000000000000000000000000000..692dcd3a84fa816bd35ca35846727f6ca7692ec6 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0090132480/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854f75d5bae4dcbedd740c24402748f0f36e5d1a425ebdd7f53dc034c4bcab65 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0100147200/cfg.json b/resnet/zgyp3v0o/cp_0100147200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..425a76d06da15130575e6d3ffc512375e150e250 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0100147200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 19560} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0100147200/model b/resnet/zgyp3v0o/cp_0100147200/model new file mode 100644 index 0000000000000000000000000000000000000000..6dbc9a08ae27f14b620098a8e8cbc1c529692237 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0100147200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7e746c45f915fdaf2344c7728249423ac457d8917e2d4771e38f0a52b9b97fd +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0110161920/cfg.json b/resnet/zgyp3v0o/cp_0110161920/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c6eefdd52e0ae73f27ec00641450199e58ac1af5 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0110161920/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 21516} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0110161920/model b/resnet/zgyp3v0o/cp_0110161920/model new file mode 100644 index 0000000000000000000000000000000000000000..71f9a78560c310765cfa25bdfc07605f84937bae --- /dev/null +++ b/resnet/zgyp3v0o/cp_0110161920/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02090536e2e7969fe383e7272d5cfd01f68b54b119ec4d2bdaa166e1eab1a65 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0120176640/cfg.json b/resnet/zgyp3v0o/cp_0120176640/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..16a2e18bcfcfcf12dc66fad73005525c5ccfd693 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0120176640/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 23472} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0120176640/model b/resnet/zgyp3v0o/cp_0120176640/model new file mode 100644 index 0000000000000000000000000000000000000000..0c8ec36f4be892cc08a2645d873d933fb9f86cb1 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0120176640/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e59eb0d4a91021cdee22028e7bb21c1f7956346e155d2992a64cd76780c6e6 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0130191360/cfg.json b/resnet/zgyp3v0o/cp_0130191360/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7ae07ce508821fecc327e7529ae943eaca973cce --- /dev/null +++ b/resnet/zgyp3v0o/cp_0130191360/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 25428} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0130191360/model b/resnet/zgyp3v0o/cp_0130191360/model new file mode 100644 index 0000000000000000000000000000000000000000..40af53e9376953b6ee0c75abd2df852c8d881b7c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0130191360/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b617938a496f76d4f10aacd8ba2747a1d376f063e1a9714b522445e1784954 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0140206080/cfg.json b/resnet/zgyp3v0o/cp_0140206080/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1f750dfbbba15965b2cfeb77c546b77314fad519 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0140206080/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 27384} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0140206080/model b/resnet/zgyp3v0o/cp_0140206080/model new file mode 100644 index 0000000000000000000000000000000000000000..8fbe6c015f865cff6a5f07e07b7644fce9c99421 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0140206080/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2473803a2f25fe4d24da922ff845e7cb7ce1978e0460efbc69ed8779e1daa59d +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0150220800/cfg.json b/resnet/zgyp3v0o/cp_0150220800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77899406e49dc9738f357a41008fc96b7f2e0908 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0150220800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 29340} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0150220800/model b/resnet/zgyp3v0o/cp_0150220800/model new file mode 100644 index 0000000000000000000000000000000000000000..5586b193281cffeceabce90365cd9cd62886dc31 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0150220800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccff20827c91a0b977801223bc9ebbd34565ff22250a09034fbb12123a284267 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0160235520/cfg.json b/resnet/zgyp3v0o/cp_0160235520/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..128bf2b97c20ac66c835a5c3b773cffa9345f9e3 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0160235520/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 31296} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0160235520/model b/resnet/zgyp3v0o/cp_0160235520/model new file mode 100644 index 0000000000000000000000000000000000000000..414ff49857463a9e88bb8e29484f8026a455ead0 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0160235520/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e72288b7c7e03cbd2933fe95fcf4740cc04309badc07cb3d31b9247f915fd8 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0170250240/cfg.json b/resnet/zgyp3v0o/cp_0170250240/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..5cce66e4e1d7f8633fa04b258d49b5399289bf22 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0170250240/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 33252} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0170250240/model b/resnet/zgyp3v0o/cp_0170250240/model new file mode 100644 index 0000000000000000000000000000000000000000..81e1f0bdbe397a3e8fb4ecd1a25454770c61a3be --- /dev/null +++ b/resnet/zgyp3v0o/cp_0170250240/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348a4628d7b9e6642f508f7dafe3f2e095547ba6047ca89c151cba4851593dc7 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0180264960/cfg.json b/resnet/zgyp3v0o/cp_0180264960/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..568c98c2847bbd2a238396477bed145d35e776f1 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0180264960/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 35208} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0180264960/model b/resnet/zgyp3v0o/cp_0180264960/model new file mode 100644 index 0000000000000000000000000000000000000000..b5bcc63d4137574054412b78aeb004c98531183c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0180264960/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc7bd68cf042ad623421b3f0a0f0ccb47824ae2ac8a039b61347d9bcff634cf +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0190279680/cfg.json b/resnet/zgyp3v0o/cp_0190279680/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..d1b2cd83eb37551b5937e244cbb2a3a5980c4c85 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0190279680/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 37164} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0190279680/model b/resnet/zgyp3v0o/cp_0190279680/model new file mode 100644 index 0000000000000000000000000000000000000000..118dae7c1f069cd93ed92065a9304011b9e0c7b2 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0190279680/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b058d60823e4fa34ae9afd0f3b78ab2a1a5778f994f5f2810e60f8402b7300a1 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0200294400/cfg.json b/resnet/zgyp3v0o/cp_0200294400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..511322416107cdeb955f27d2199a0e63d17d5d52 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0200294400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 39120} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0200294400/model b/resnet/zgyp3v0o/cp_0200294400/model new file mode 100644 index 0000000000000000000000000000000000000000..eb0ec945834c6752d8347ca7e4fdb853d625b2ee --- /dev/null +++ b/resnet/zgyp3v0o/cp_0200294400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:255e2fa8b6cf05b8374fe9c1995e1eff5fe51f0542b820479281828814c27709 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0300441600/cfg.json b/resnet/zgyp3v0o/cp_0300441600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..b6e92461467b7916938aec24bbdc2e5e73154e23 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0300441600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 58680} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0300441600/model b/resnet/zgyp3v0o/cp_0300441600/model new file mode 100644 index 0000000000000000000000000000000000000000..13c60ac7e7550f928aaec1b0fbe87af1f44c45d4 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0300441600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d548a27ac49ac4e97261178fe610adef285d2657e3c0c8d0eeabdec9753cd1d0 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0400588800/cfg.json b/resnet/zgyp3v0o/cp_0400588800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c3ab25548e666f0d7b7736c26f869e0b6cf99956 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0400588800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 78240} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0400588800/model b/resnet/zgyp3v0o/cp_0400588800/model new file mode 100644 index 0000000000000000000000000000000000000000..8d0c4e59149627abe286a4b30ca8e483522ca4b1 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0400588800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7c3c87b902420223f3d3e7fb7621a2c821413bb1decd653ae0eba0f9fff73c +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0500736000/cfg.json b/resnet/zgyp3v0o/cp_0500736000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..c81f653824068bb02e4c10ae3a5056ff86dcf6b7 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0500736000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 97800} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0500736000/model b/resnet/zgyp3v0o/cp_0500736000/model new file mode 100644 index 0000000000000000000000000000000000000000..cbb3eb4a5b76ba4e982d4eb98665a9fc0d268035 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0500736000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e30f5407acf9f7a04e0adc343d4d8baf1a762331d46d421d59dfaccef83f67 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0600883200/cfg.json b/resnet/zgyp3v0o/cp_0600883200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e18089575bbc72569c0b0d3bf9a76beb0c625484 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0600883200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 117360} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0600883200/model b/resnet/zgyp3v0o/cp_0600883200/model new file mode 100644 index 0000000000000000000000000000000000000000..831300ceb5d71a9f8ec1e80a14f0846e3aaecf0b --- /dev/null +++ b/resnet/zgyp3v0o/cp_0600883200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcc2053e8ddc41c59690c1ebd7c607ad4da8e89563845cab97d48a7303b47570 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0701030400/cfg.json b/resnet/zgyp3v0o/cp_0701030400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..db4ab5cac0a3644bb79440e30a624971bafe9e6d --- /dev/null +++ b/resnet/zgyp3v0o/cp_0701030400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 136920} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0701030400/model b/resnet/zgyp3v0o/cp_0701030400/model new file mode 100644 index 0000000000000000000000000000000000000000..d47cb461d5d3baea25aa04df2c5e1191f5c0449c --- /dev/null +++ b/resnet/zgyp3v0o/cp_0701030400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0a86b305dd0282ade80e2686091b97dc6af4e8756e77a482cc6e6d130d1bd59 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0801177600/cfg.json b/resnet/zgyp3v0o/cp_0801177600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a6b071edc1a35b86514a4ac371ef37f1077e1441 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0801177600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 156480} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0801177600/model b/resnet/zgyp3v0o/cp_0801177600/model new file mode 100644 index 0000000000000000000000000000000000000000..b5aca8c922bc4136f580b006b0307dfb40e87602 --- /dev/null +++ b/resnet/zgyp3v0o/cp_0801177600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ed496670e743ab1bff41c5e3b3510ae756aeaa81cc84245b812a71246e40fd +size 49105345 diff --git a/resnet/zgyp3v0o/cp_0901324800/cfg.json b/resnet/zgyp3v0o/cp_0901324800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..ae498d3c33d44597aa1e559243b83aa867be5a3f --- /dev/null +++ b/resnet/zgyp3v0o/cp_0901324800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 176040} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_0901324800/model b/resnet/zgyp3v0o/cp_0901324800/model new file mode 100644 index 0000000000000000000000000000000000000000..970c57dfdafc87a2e7069b24ea2c6974fc51e3da --- /dev/null +++ b/resnet/zgyp3v0o/cp_0901324800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f17f08b9e5e63e56b239dd83f66117796f9e5a79d648e247fe8730b285f2ad +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1001472000/cfg.json b/resnet/zgyp3v0o/cp_1001472000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..29c213c39d258c63a6b6a6219f6ac40ba4bf06bd --- /dev/null +++ b/resnet/zgyp3v0o/cp_1001472000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 195600} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1001472000/model b/resnet/zgyp3v0o/cp_1001472000/model new file mode 100644 index 0000000000000000000000000000000000000000..d27e5b0517e23dd4a1657d1f59d244f70b9c4383 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1001472000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98dddeefcc810b8505fed9bc73c66c1e2622649c2084215041ab13ca1dd88c63 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1101619200/cfg.json b/resnet/zgyp3v0o/cp_1101619200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..6cafa041aa0ab83b5c933cd10a5e048afbf5b64a --- /dev/null +++ b/resnet/zgyp3v0o/cp_1101619200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 215160} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1101619200/model b/resnet/zgyp3v0o/cp_1101619200/model new file mode 100644 index 0000000000000000000000000000000000000000..f4808e7fe521e49179176ec39469f54c1a57aa7c --- /dev/null +++ b/resnet/zgyp3v0o/cp_1101619200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde0630934b3afeb8f92685545c61da1133504757332a1ce68b84cb069365e94 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1201766400/cfg.json b/resnet/zgyp3v0o/cp_1201766400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..45dce70a883be51512cb8199e73e7042d69d6996 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1201766400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 234720} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1201766400/model b/resnet/zgyp3v0o/cp_1201766400/model new file mode 100644 index 0000000000000000000000000000000000000000..8dd19461eb0d6f62c24f773a1a715dce00479612 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1201766400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373d3192b0fbc45b31160e59a15fe44aee15f3a79291d9511b8cd176c0e78917 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1301913600/cfg.json b/resnet/zgyp3v0o/cp_1301913600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..0bac74aae8a858d3f2b4641668fcfebfcef47f01 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1301913600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 254280} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1301913600/model b/resnet/zgyp3v0o/cp_1301913600/model new file mode 100644 index 0000000000000000000000000000000000000000..ae03c945057954da9944d2ca6fc5099918f3cf2b --- /dev/null +++ b/resnet/zgyp3v0o/cp_1301913600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1bd8c121c19a8a1c73f877319b17040a5f1bc2273e6ea9a2f2cc0624b4817b +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1402060800/cfg.json b/resnet/zgyp3v0o/cp_1402060800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e3b5c5531dbf03664a539b4b99eac2baec537a47 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1402060800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 273840} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1402060800/model b/resnet/zgyp3v0o/cp_1402060800/model new file mode 100644 index 0000000000000000000000000000000000000000..8dad5359396e1487143334abca80fc8aaf8d479c --- /dev/null +++ b/resnet/zgyp3v0o/cp_1402060800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ca91a828a7917d6b4ba35c569b5419daf1990d7e2afb3ee39655c98d676332 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1502208000/cfg.json b/resnet/zgyp3v0o/cp_1502208000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..2feef0d847f110e6f101071a5bc85869651d0dbe --- /dev/null +++ b/resnet/zgyp3v0o/cp_1502208000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 293400} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1502208000/model b/resnet/zgyp3v0o/cp_1502208000/model new file mode 100644 index 0000000000000000000000000000000000000000..2e32d57e1e208915e8bb7f351f4f74f79513d1b7 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1502208000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ccd0bbdade826905dd2a2d49546d0fec8390883f60dedcf43be848691c94b2e +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1602355200/cfg.json b/resnet/zgyp3v0o/cp_1602355200/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..9cf18927c4955c50eea03a8de95a30de3563fb18 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1602355200/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 312960} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1602355200/model b/resnet/zgyp3v0o/cp_1602355200/model new file mode 100644 index 0000000000000000000000000000000000000000..1d1c55751546a4378999e74bd12206a11f2fb3fa --- /dev/null +++ b/resnet/zgyp3v0o/cp_1602355200/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342ced9879b5892ebdc8136c2e985588836676ff2f9494e499075ef81f0be402 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1702502400/cfg.json b/resnet/zgyp3v0o/cp_1702502400/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..e823c7adfa8dab31fd3ec57a3da59ae54e141838 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1702502400/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 332520} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1702502400/model b/resnet/zgyp3v0o/cp_1702502400/model new file mode 100644 index 0000000000000000000000000000000000000000..88a40d3774692164537bd602fe14e56f56103808 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1702502400/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddad531329e2460d2ac7f4936615a26b891e100c91380d835bf46a33fae9aa94 +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1802649600/cfg.json b/resnet/zgyp3v0o/cp_1802649600/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1f9e6ba4cdda36c734521b16dbee5dfde00ed1ea --- /dev/null +++ b/resnet/zgyp3v0o/cp_1802649600/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 352080} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1802649600/model b/resnet/zgyp3v0o/cp_1802649600/model new file mode 100644 index 0000000000000000000000000000000000000000..ad8163268f8e8c7299cae457b12ee9a0874a8743 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1802649600/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d049a6e9d64cffdfce6b942471295a3e835088c3b1e168c2a294d2061cc56dda +size 49105345 diff --git a/resnet/zgyp3v0o/cp_1902796800/cfg.json b/resnet/zgyp3v0o/cp_1902796800/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..40c0a6f5ff0c3ea19a47801a57e319f87c107fd8 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1902796800/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 371640} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_1902796800/model b/resnet/zgyp3v0o/cp_1902796800/model new file mode 100644 index 0000000000000000000000000000000000000000..d4eb418fe2013817bebdb0ef6c0f1a268533ee35 --- /dev/null +++ b/resnet/zgyp3v0o/cp_1902796800/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ac4e627a75b0f71c7ace710af730d3d4f2e6675e44c4358c0764e4b220123d +size 49105345 diff --git a/resnet/zgyp3v0o/cp_2002944000/cfg.json b/resnet/zgyp3v0o/cp_2002944000/cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..a50bdd4c3571e5d517cb3ea53a9e1c6ed361caa3 --- /dev/null +++ b/resnet/zgyp3v0o/cp_2002944000/cfg.json @@ -0,0 +1 @@ +{"cfg": {"train_env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 30, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "train", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 120, "num_envs": 1, "seed": 1934240608}, "eval_envs": {"test_unfiltered": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "test", "difficulty": "unfiltered", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}, "valid_medium": {"env": {"_type_": "cleanba.environments:EnvpoolBoxobanConfig", "env_id": "Sokoban-v0", "reward_finished": 10.0, "reward_box": 1.0, "reward_step": -0.1, "verbose": 0, "min_episode_steps": 240, "load_sequentially": false, "n_levels_to_load": -1, "cache_path": "/opt/sokoban_cache", "split": "valid", "difficulty": "medium", "num_threads": 0, "thread_affinity_offset": -1, "max_num_players": 1, "max_episode_steps": 240, "num_envs": 256, "seed": 5454}, "n_episode_multiple": 2, "steps_to_think": [0, 2, 4, 8, 12, 16, 24, 32], "temperature": 0.0, "safeguard_max_episode_steps": 30000}}, "eval_at_steps": [312960, 3716, 391, 97800, 35208, 21516, 782, 7824, 195600, 1173, 293400, 29340, 1564, 15648, 78240, 391200, 1956, 176040, 2347, 37164, 23472, 273840, 2738, 27384, 9780, 58680, 3129, 371640, 3520, 31296, 156480, 195, 17604, 3912, 254280, 586, 39120, 352080, 978, 25428, 11736, 1369, 136920, 1760, 234720, 33252, 2151, 19560, 332520, 5868, 2542, 117360, 2934, 215160, 13692, 3325], "seed": 813302748, "save_model": true, "log_frequency": 10, "sync_frequency": 100000000000000000000, "actor_update_frequency": 1, "actor_update_cutoff": 100000000000000000000, "base_run_dir": "/training/cleanba", "loss": {"gamma": 0.97, "ent_coef": 0.01, "vf_coef": 0.25, "vtrace_lambda": 0.5, "clip_rho_threshold": 1.0, "clip_pg_rho_threshold": 1.0, "normalize_advantage": false, "logit_l2_coef": 1.5625e-06, "weight_l2_coef": 1.5625e-08, "max_vf_error": 1.0, "vf_loss_type": "square", "advantage_multiplier": "one"}, "net": {"_type_": "cleanba.network:GuezResNetConfig", "channels": [32, 32, 64, 64, 64, 64, 64, 64, 64], "strides": [1, 1, 1, 1, 1, 1, 1, 1, 1], "kernel_sizes": [4, 4, 4, 4, 4, 4, 4, 4, 4], "mlp_hiddens": [256], "yang_init": false, "norm": {"_type_": "cleanba.network:IdentityNorm"}, "normalize_input": false, "head_scale": 1.0}, "total_timesteps": 2002944000, "learning_rate": 0.0004, "final_learning_rate": 4e-06, "local_num_envs": 256, "num_steps": 20, "train_epochs": 1, "anneal_lr": true, "num_minibatches": 8, "gradient_accumulation_steps": 1, "max_grad_norm": 0.00025, "optimizer": "adam", "adam_b1": 0.9, "rmsprop_eps": 1.5625e-07, "rmsprop_decay": 0.99, "optimizer_yang": false, "base_fan_in": 1, "queue_timeout": 300.0, "num_actor_threads": 1, "actor_device_ids": [0], "learner_device_ids": [0], "distributed": false, "concurrency": true, "load_path": null}, "update_step": 391200} \ No newline at end of file diff --git a/resnet/zgyp3v0o/cp_2002944000/model b/resnet/zgyp3v0o/cp_2002944000/model new file mode 100644 index 0000000000000000000000000000000000000000..14b1535a32bdac9e9e63a3fa5c5da43d0e0eaf80 --- /dev/null +++ b/resnet/zgyp3v0o/cp_2002944000/model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ad7741a4676164862ecac1a2bfff45334424b0d5341bedb4170ffba6184fb37 +size 49105345