From 4454c074ae4912b8ce71f432cc081a95a7562a86 Mon Sep 17 00:00:00 2001 From: Lasmar Khalifa Date: Wed, 27 May 2026 16:57:59 -0400 Subject: [PATCH] Use block events in claude --- .../providers/claude/claude_invocation.rb | 4 +- .../functional/roast_examples_test.rb | 40 ++++++++--- .../claude/claude_invocation_test.rb | 71 +++++++++---------- test/test_helper.rb | 3 + 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/lib/roast/cogs/agent/providers/claude/claude_invocation.rb b/lib/roast/cogs/agent/providers/claude/claude_invocation.rb index 1c6ad3d7..5139525b 100644 --- a/lib/roast/cogs/agent/providers/claude/claude_invocation.rb +++ b/lib/roast/cogs/agent/providers/claude/claude_invocation.rb @@ -77,7 +77,7 @@ def run! raise ClaudeAlreadyStartedError if started? @started = true - puts "[USER PROMPT] #{@prompt}" if @show_prompt + Event << { block: { header: "USER PROMPT", content: @prompt } } if @show_prompt _stdout, stderr, status = CommandRunner.execute( command_line, working_directory: @working_directory, @@ -87,7 +87,7 @@ def run! if status.success? @completed = true - puts "[AGENT RESPONSE] #{@result.response}" if @show_response + Event << { block: { header: "AGENT RESPONSE", content: @result.response } } if @show_response else @failed = true @result.success = false diff --git a/test/examples/functional/roast_examples_test.rb b/test/examples/functional/roast_examples_test.rb index ec06aac8..225e6efe 100644 --- a/test/examples/functional/roast_examples_test.rb +++ b/test/examples/functional/roast_examples_test.rb @@ -72,30 +72,48 @@ class RoastExamplesTest < FunctionalTest logged_stdout, logged_stderr = original_streams_from_logger_output expected_stdout = <<~STDOUT - [USER PROMPT] What is 2+2? + [USER PROMPT] + ──────────────────────────────────────── + What is 2+2? + ──────────────────────────────────────── The user is asking a simple math question: "What is 2+2?" This is a straightforward arithmetic question. The answer is 4. This doesn't require any tool usage - it's just a basic math question. I should answer directly and concisely. 2 + 2 = 4 - [AGENT RESPONSE] 2 + 2 = 4 - [USER PROMPT] Now multiply that by 3 + [AGENT RESPONSE] + ──────────────────────────────────────── + 2 + 2 = 4 + ──────────────────────────────────────── + [USER PROMPT] + ──────────────────────────────────────── + Now multiply that by 3 + ──────────────────────────────────────── The user is asking me to multiply the previous answer (4) by 3. 4 × 3 = 12 This is another straightforward arithmetic question. No tools needed. 4 × 3 = 12 - [AGENT RESPONSE] 4 × 3 = 12 - [USER PROMPT] Now subtract 5 + [AGENT RESPONSE] + ──────────────────────────────────────── + 4 × 3 = 12 + ──────────────────────────────────────── + [USER PROMPT] + ──────────────────────────────────────── + Now subtract 5 + ──────────────────────────────────────── The user is asking me to subtract 5 from the previous answer (12). 12 - 5 = 7 This is another straightforward arithmetic question. No tools needed. 12 - 5 = 7 - [AGENT RESPONSE] 12 - 5 = 7 + [AGENT RESPONSE] + ──────────────────────────────────────── + 12 - 5 = 7 + ──────────────────────────────────────── [AGENT STATS] Turns: 3 Duration: 6 seconds Cost (USD): $0.0747 @@ -583,7 +601,10 @@ class RoastExamplesTest < FunctionalTest logged_stdout, logged_stderr = original_streams_from_logger_output expected_stdout = <<~STDOUT - [USER PROMPT] What is the world's largest lake? + [USER PROMPT] + ──────────────────────────────────────── + What is the world's largest lake? + ──────────────────────────────────────── The user is asking me a simple geography question about the world's largest lake. This is a straightforward factual question that doesn't require any tools or special context. The world's largest lake by surface area is the Caspian Sea, which covers about 143,550 square miles (371,000 square kilometers). It's technically called a "sea" but is actually a lake because it's not connected to the ocean. @@ -610,9 +631,12 @@ class RoastExamplesTest < FunctionalTest Caspian spreads wide— Ancient waters vast and deep, World's largest lake gleams. - [AGENT RESPONSE] Caspian spreads wide— + [AGENT RESPONSE] + ──────────────────────────────────────── + Caspian spreads wide— Ancient waters vast and deep, World's largest lake gleams. + ──────────────────────────────────────── [AGENT STATS] Turns: 1 Duration: 4 seconds Cost (USD): $0.050913 diff --git a/test/roast/cogs/agent/providers/claude/claude_invocation_test.rb b/test/roast/cogs/agent/providers/claude/claude_invocation_test.rb index bbb2ef6c..f9d8d341 100644 --- a/test/roast/cogs/agent/providers/claude/claude_invocation_test.rb +++ b/test/roast/cogs/agent/providers/claude/claude_invocation_test.rb @@ -388,71 +388,70 @@ def failure_status @invocation.send(:handle_message, second) end - test "run! prints prompt when show_prompt is enabled" do + test "run! emits USER PROMPT block event when show_prompt is enabled" do @config.show_prompt! invocation = ClaudeInvocation.new(@config, "Hello agent", nil) - output = capture_io do - CommandRunner.stub(:execute, ["", "", success_status]) do - invocation.run! - end + Event.expects(:<<).with do |payload| + payload[:block] && + payload[:block][:header] == "USER PROMPT" && + payload[:block][:content] == "Hello agent" end - assert_match "[USER PROMPT] Hello agent", output.first + CommandRunner.stub(:execute, ["", "", success_status]) do + invocation.run! + end end - test "run! does not print prompt when show_prompt is disabled" do + test "run! does not emit USER PROMPT block event when show_prompt is disabled" do invocation = ClaudeInvocation.new(@config, "Hello agent", nil) - output = capture_io do - CommandRunner.stub(:execute, ["", "", success_status]) do - invocation.run! - end - end + Event.expects(:<<).never - refute_match(/\[USER PROMPT\]/, output.first) + CommandRunner.stub(:execute, ["", "", success_status]) do + invocation.run! + end end - test "run! prints response when show_response is enabled" do + test "run! emits AGENT RESPONSE block event when show_response is enabled" do @config.show_response! invocation = ClaudeInvocation.new(@config, "Hello agent", nil) result_json = { type: "result", subtype: "success", result: "Here is my answer" }.to_json - output = capture_io do - CommandRunner.stub(:execute, ->(*_args, **kwargs) { - kwargs[:stdout_handler]&.call(result_json) - ["", "", success_status] - }) do - invocation.run! - end + Event.expects(:<<).with do |payload| + payload[:block] && + payload[:block][:header] == "AGENT RESPONSE" && + payload[:block][:content] == "Here is my answer" end - assert_match "[AGENT RESPONSE] Here is my answer", output.first + CommandRunner.stub(:execute, ->(*_args, **kwargs) { + kwargs[:stdout_handler]&.call(result_json) + ["", "", success_status] + }) do + invocation.run! + end end - test "run! does not print response when show_response is disabled" do + test "run! does not emit AGENT RESPONSE block event when show_response is disabled" do @config.no_show_response! invocation = ClaudeInvocation.new(@config, "Hello agent", nil) - output = capture_io do - CommandRunner.stub(:execute, ["", "", success_status]) do - invocation.run! - end - end + Event.expects(:<<).never - refute_match(/\[AGENT RESPONSE\]/, output.first) + CommandRunner.stub(:execute, ["", "", success_status]) do + invocation.run! + end end - test "run! does not print response on failure" do + test "run! does not emit AGENT RESPONSE block event on failure even when show_response is enabled" do + @config.show_response! invocation = ClaudeInvocation.new(@config, "Hello agent", nil) - output = capture_io do - CommandRunner.stub(:execute, ["", "Error", failure_status]) do - invocation.run! - end - end + Event.expects(:<<).never - refute_match(/\[AGENT RESPONSE\]/, output.first) + CommandRunner.stub(:execute, ["", "Error", failure_status]) do + invocation.run! + end end end end diff --git a/test/test_helper.rb b/test/test_helper.rb index c2d7bdbd..ebea78d3 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -148,6 +148,9 @@ def original_streams_from_logger_output(logger_output: @logger_output.string) elsif line.include?(" ❯") || line.include?(" ❙") current_stream = :stdout stdout_lines << line.sub(/^.*(❯|❙) ?/, "") + elsif (header_match = line.match(/(\[[^\]]+\])\s*↓/)) + current_stream = :stdout + stdout_lines << "#{header_match[1]}\n" else current_stream = nil end