Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/roast/cogs/agent/providers/claude/claude_invocation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def run!
raise ClaudeAlreadyStartedError if started?

@started = true
puts "[USER PROMPT] #{@prompt}" if @show_prompt
Event << { block: { header: "USER PROMPT", content: @prompt } } if @show_prompt
_stdout, stderr, status = CommandRunner.execute(
command_line,
working_directory: @working_directory,
Expand All @@ -87,7 +87,7 @@ def run!

if status.success?
@completed = true
puts "[AGENT RESPONSE] #{@result.response}" if @show_response
Event << { block: { header: "AGENT RESPONSE", content: @result.response } } if @show_response
else
@failed = true
@result.success = false
Expand Down
40 changes: 32 additions & 8 deletions test/examples/functional/roast_examples_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,30 +72,48 @@ class RoastExamplesTest < FunctionalTest

logged_stdout, logged_stderr = original_streams_from_logger_output
expected_stdout = <<~STDOUT
[USER PROMPT] What is 2+2?
[USER PROMPT]
────────────────────────────────────────
What is 2+2?
────────────────────────────────────────
The user is asking a simple math question: "What is 2+2?"

This is a straightforward arithmetic question. The answer is 4.

This doesn't require any tool usage - it's just a basic math question. I should answer directly and concisely.
2 + 2 = 4
[AGENT RESPONSE] 2 + 2 = 4
[USER PROMPT] Now multiply that by 3
[AGENT RESPONSE]
────────────────────────────────────────
2 + 2 = 4
────────────────────────────────────────
[USER PROMPT]
────────────────────────────────────────
Now multiply that by 3
────────────────────────────────────────
The user is asking me to multiply the previous answer (4) by 3.

4 × 3 = 12

This is another straightforward arithmetic question. No tools needed.
4 × 3 = 12
[AGENT RESPONSE] 4 × 3 = 12
[USER PROMPT] Now subtract 5
[AGENT RESPONSE]
────────────────────────────────────────
4 × 3 = 12
────────────────────────────────────────
[USER PROMPT]
────────────────────────────────────────
Now subtract 5
────────────────────────────────────────
The user is asking me to subtract 5 from the previous answer (12).

12 - 5 = 7

This is another straightforward arithmetic question. No tools needed.
12 - 5 = 7
[AGENT RESPONSE] 12 - 5 = 7
[AGENT RESPONSE]
────────────────────────────────────────
12 - 5 = 7
────────────────────────────────────────
[AGENT STATS] Turns: 3
Duration: 6 seconds
Cost (USD): $0.0747
Expand Down Expand Up @@ -583,7 +601,10 @@ class RoastExamplesTest < FunctionalTest

logged_stdout, logged_stderr = original_streams_from_logger_output
expected_stdout = <<~STDOUT
[USER PROMPT] What is the world's largest lake?
[USER PROMPT]
────────────────────────────────────────
What is the world's largest lake?
────────────────────────────────────────
The user is asking me a simple geography question about the world's largest lake. This is a straightforward factual question that doesn't require any tools or special context.

The world's largest lake by surface area is the Caspian Sea, which covers about 143,550 square miles (371,000 square kilometers). It's technically called a "sea" but is actually a lake because it's not connected to the ocean.
Expand All @@ -610,9 +631,12 @@ class RoastExamplesTest < FunctionalTest
Caspian spreads wide—
Ancient waters vast and deep,
World's largest lake gleams.
[AGENT RESPONSE] Caspian spreads wide—
[AGENT RESPONSE]
────────────────────────────────────────
Caspian spreads wide—
Ancient waters vast and deep,
World's largest lake gleams.
────────────────────────────────────────
[AGENT STATS] Turns: 1
Duration: 4 seconds
Cost (USD): $0.050913
Expand Down
71 changes: 35 additions & 36 deletions test/roast/cogs/agent/providers/claude/claude_invocation_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -388,71 +388,70 @@ def failure_status
@invocation.send(:handle_message, second)
end

test "run! prints prompt when show_prompt is enabled" do
test "run! emits USER PROMPT block event when show_prompt is enabled" do
@config.show_prompt!
invocation = ClaudeInvocation.new(@config, "Hello agent", nil)

output = capture_io do
CommandRunner.stub(:execute, ["", "", success_status]) do
invocation.run!
end
Event.expects(:<<).with do |payload|
payload[:block] &&
payload[:block][:header] == "USER PROMPT" &&
payload[:block][:content] == "Hello agent"
Comment thread
LasmarKhalifa marked this conversation as resolved.
end

assert_match "[USER PROMPT] Hello agent", output.first
CommandRunner.stub(:execute, ["", "", success_status]) do
invocation.run!
end
end

test "run! does not print prompt when show_prompt is disabled" do
test "run! does not emit USER PROMPT block event when show_prompt is disabled" do
invocation = ClaudeInvocation.new(@config, "Hello agent", nil)

output = capture_io do
CommandRunner.stub(:execute, ["", "", success_status]) do
invocation.run!
end
end
Event.expects(:<<).never

refute_match(/\[USER PROMPT\]/, output.first)
CommandRunner.stub(:execute, ["", "", success_status]) do
invocation.run!
end
end

test "run! prints response when show_response is enabled" do
test "run! emits AGENT RESPONSE block event when show_response is enabled" do
@config.show_response!
invocation = ClaudeInvocation.new(@config, "Hello agent", nil)

result_json = { type: "result", subtype: "success", result: "Here is my answer" }.to_json
output = capture_io do
CommandRunner.stub(:execute, ->(*_args, **kwargs) {
kwargs[:stdout_handler]&.call(result_json)
["", "", success_status]
}) do
invocation.run!
end
Event.expects(:<<).with do |payload|
payload[:block] &&
payload[:block][:header] == "AGENT RESPONSE" &&
payload[:block][:content] == "Here is my answer"
end

assert_match "[AGENT RESPONSE] Here is my answer", output.first
CommandRunner.stub(:execute, ->(*_args, **kwargs) {
kwargs[:stdout_handler]&.call(result_json)
["", "", success_status]
}) do
invocation.run!
end
end

test "run! does not print response when show_response is disabled" do
test "run! does not emit AGENT RESPONSE block event when show_response is disabled" do
@config.no_show_response!
invocation = ClaudeInvocation.new(@config, "Hello agent", nil)

output = capture_io do
CommandRunner.stub(:execute, ["", "", success_status]) do
invocation.run!
end
end
Event.expects(:<<).never

refute_match(/\[AGENT RESPONSE\]/, output.first)
CommandRunner.stub(:execute, ["", "", success_status]) do
invocation.run!
end
end

test "run! does not print response on failure" do
test "run! does not emit AGENT RESPONSE block event on failure even when show_response is enabled" do
@config.show_response!
invocation = ClaudeInvocation.new(@config, "Hello agent", nil)

output = capture_io do
CommandRunner.stub(:execute, ["", "Error", failure_status]) do
invocation.run!
end
end
Event.expects(:<<).never

refute_match(/\[AGENT RESPONSE\]/, output.first)
CommandRunner.stub(:execute, ["", "Error", failure_status]) do
invocation.run!
end
end
end
end
Expand Down
3 changes: 3 additions & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ def original_streams_from_logger_output(logger_output: @logger_output.string)
elsif line.include?(" ❯") || line.include?(" ❙")
current_stream = :stdout
stdout_lines << line.sub(/^.*(❯|❙) ?/, "")
elsif (header_match = line.match(/(\[[^\]]+\])\s*↓/))
current_stream = :stdout
stdout_lines << "#{header_match[1]}\n"
else
current_stream = nil
end
Expand Down
Loading