|
21 | 21 |
|
22 | 22 | logging.getLogger().setLevel(logging.DEBUG)
|
23 | 23 |
|
24 |
| -#from agentlab.agents.webmall_generic_agent import AGENT_4o_VISION |
25 |
| -#from agentlab.agents.generic_agent import AGENT_4o_VISION |
26 |
| - |
27 | 24 | from agentlab.agents import dynamic_prompting as dp
|
28 | 25 |
|
29 |
| -#from agentlab.llm.eco_logits_llm_configs import CHAT_MODEL_ARGS_DICT |
30 | 26 | from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
|
31 | 27 |
|
32 |
| -#from agentlab.agents.webmall_generic_agent.generic_agent import GenericAgent, GenericPromptFlags, GenericAgentArgs |
33 | 28 | from agentlab.agents.generic_agent.generic_agent import GenericAgent, GenericPromptFlags, GenericAgentArgs
|
34 | 29 |
|
35 | 30 | FLAGS_default = GenericPromptFlags(
|
|
87 | 82 | FLAGS_AX_M.use_memory = True
|
88 | 83 | FLAGS_AX_M.extra_instructions = 'Use your memory to note down important information like the URLs of potential solutions and corresponding pricing information.'
|
89 | 84 |
|
90 |
| -FLAGS_AX_V_M = FLAGS_default.copy() |
91 |
| -FLAGS_AX_V_M.obs.use_screenshot = True |
92 |
| -FLAGS_AX_V_M.obs.use_som = True |
93 |
| -FLAGS_AX_V_M.use_memory = True |
94 |
| -FLAGS_AX_V_M.extra_instructions = 'Use your memory to note down important information like the URLs of potential solutions and corresponding pricing information.' |
95 |
| - |
96 | 85 | AGENT_41_AX = GenericAgentArgs(
|
| 86 | + chat_model_args=CHAT_MODEL_ARGS_DICT["openai/gpt-4.1-2025-04-14"], |
| 87 | + flags=FLAGS_AX, |
| 88 | +) |
| 89 | + |
| 90 | +AGENT_CLAUDE_AX = GenericAgentArgs( |
97 | 91 | chat_model_args=CHAT_MODEL_ARGS_DICT["anthropic/claude-sonnet-4-20250514"],
|
98 | 92 | flags=FLAGS_AX,
|
99 | 93 | )
|
|
103 | 97 | flags=FLAGS_V,
|
104 | 98 | )
|
105 | 99 |
|
| 100 | +AGENT_CLAUDE_V = GenericAgentArgs( |
| 101 | + chat_model_args=CHAT_MODEL_ARGS_DICT["anthropic/claude-sonnet-4-20250514"], |
| 102 | + flags=FLAGS_V, |
| 103 | +) |
106 | 104 |
|
107 | 105 | AGENT_41_AX_V = GenericAgentArgs(
|
| 106 | + chat_model_args=CHAT_MODEL_ARGS_DICT["openai/gpt-4.1-2025-04-14"], |
| 107 | + flags=FLAGS_AX_V, |
| 108 | +) |
| 109 | + |
| 110 | +AGENT_CLAUDE_AX_V = GenericAgentArgs( |
108 | 111 | chat_model_args=CHAT_MODEL_ARGS_DICT["anthropic/claude-sonnet-4-20250514"],
|
109 | 112 | flags=FLAGS_AX_V,
|
110 | 113 | )
|
111 | 114 |
|
112 | 115 | AGENT_41_AX_M = GenericAgentArgs(
|
113 |
| - chat_model_args=CHAT_MODEL_ARGS_DICT["anthropic/claude-sonnet-4-20250514"], |
| 116 | + chat_model_args=CHAT_MODEL_ARGS_DICT["openai/gpt-4.1-2025-04-14"], |
114 | 117 | flags=FLAGS_AX_M,
|
115 | 118 | )
|
116 | 119 |
|
117 |
| -AGENT_41_AX_V_M = GenericAgentArgs( |
118 |
| - chat_model_args=CHAT_MODEL_ARGS_DICT["openai/gpt-4.1-2025-04-14"], |
119 |
| - flags=FLAGS_AX_V_M, |
| 120 | +AGENT_CLAUDE_AX_M = GenericAgentArgs( |
| 121 | + chat_model_args=CHAT_MODEL_ARGS_DICT["anthropic/claude-sonnet-4-20250514"], |
| 122 | + flags=FLAGS_AX_M, |
120 | 123 | )
|
121 | 124 |
|
122 | 125 | current_file = Path(__file__).resolve()
|
|
125 | 128 |
|
126 | 129 |
|
127 | 130 | # choose your agent or provide a new agent
|
128 |
| -agent_args = [AGENT_41_AX_M] |
| 131 | +agent_args = [AGENT_41_AX] |
129 | 132 |
|
130 | 133 | # ## select the benchmark to run on
|
131 |
| -# benchmark = "webmall_a_c_d" |
132 |
| -# benchmark = "webmall_tiny" |
133 |
| -# benchmark = "webmall" |
134 |
| -# benchmark = "miniwob" |
135 |
| -# benchmark = "workarena_l1" |
136 |
| -# benchmark = "workarena_l2" |
137 |
| -# benchmark = "workarena_l3" |
138 |
| -# benchmark = "webarena" |
139 |
| -#benchmark = "webmall_basic_v0.7" |
140 |
| -benchmark = "webmall_advanced_v0.7" |
141 |
| -#benchmark = "webmall_tiny_v0.7" |
142 |
| -# benchmark = "webmall_j_v0.7" |
| 134 | + |
| 135 | +benchmark = "webmall_basic_v0.7" |
| 136 | +# benchmark = "webmall_advanced_v0.7" |
143 | 137 |
|
144 | 138 | # Set reproducibility_mode = True for reproducibility
|
145 | 139 | # this will "ask" agents to be deterministic. Also, it will prevent you from launching if you have
|
|
0 commit comments