diff --git a/lib/instana/backend/host_agent_activation_observer.rb b/lib/instana/backend/host_agent_activation_observer.rb index 56b2157e..84c7f360 100644 --- a/lib/instana/backend/host_agent_activation_observer.rb +++ b/lib/instana/backend/host_agent_activation_observer.rb @@ -25,9 +25,9 @@ def initialize(client, discovery, wait_time: 30, logger: ::Instana.logger, max_w def update(_time, _old_version, new_version) return unless new_version.nil? - socket = @socket_proc.call(@client) - + socket = nil try_forever_with_backoff do + socket = @socket_proc.call(@client) payload = discovery_payload(socket) discovery_response = @client.send_request('PUT', DISCOVERY_URL, payload) @@ -45,7 +45,7 @@ def update(_time, _old_version, new_version) ::Instana.config.read_config_from_agent(discovery) end - socket.close + socket&.close end private diff --git a/lib/instana/backend/host_agent_reporting_observer.rb b/lib/instana/backend/host_agent_reporting_observer.rb index a4704fe2..fd95988f 100644 --- a/lib/instana/backend/host_agent_reporting_observer.rb +++ b/lib/instana/backend/host_agent_reporting_observer.rb @@ -64,8 +64,8 @@ def report_traces response = @client.send_request('POST', path, spans) unless response.ok? - @logger.debug("Sent `#{spans.count}` spans to `#{path}` and got `#{response.code}`.") - @discovery.swap { nil } + @logger.warn("Failed to send `#{spans.count}` spans to `#{path}`. Response: #{response.code} - #{response.body}") + trigger_rediscovery break end end @@ -82,8 +82,8 @@ def report_metrics if response.ok? handle_agent_tasks(response, discovery) unless response.body.empty? else - @logger.debug("Sent `#{payload}` to `#{path}` and got `#{response.code}`.") - @discovery.swap { nil } + @logger.warn("Failed to send metrics to `#{path}`. Response: #{response.code} - #{response.body}") + trigger_rediscovery end end @@ -125,6 +125,11 @@ def metrics_payload(discovery) payload end + + def trigger_rediscovery + @discovery.swap { nil } + ::Instana.agent.announce + end end end end diff --git a/lib/instana/backend/request_client.rb b/lib/instana/backend/request_client.rb index 6955b437..8eaa7a64 100644 --- a/lib/instana/backend/request_client.rb +++ b/lib/instana/backend/request_client.rb @@ -59,9 +59,25 @@ def send_request(method, path, data = nil, headers = {}) data end - - response = @client.send_request(method, path, body, headers) - Response.new(response) + begin + response = @client.send_request(method, path, body, headers) + Response.new(response) + rescue Errno::ECONNREFUSED => e + Instana.logger.debug("Connection refused to #{@host}:#{@port} - #{e.message}") + create_error_response('503', 'Connection Refused', 'Connection refused', e.message) + rescue Errno::EHOSTUNREACH => e + Instana.logger.debug("Host unreachable #{@host}:#{@port} - #{e.message}") + create_error_response('503', 'Host Unreachable', 'Host unreachable', e.message) + rescue Errno::ETIMEDOUT, Net::OpenTimeout, Net::ReadTimeout => e + Instana.logger.debug("Timeout connecting to #{@host}:#{@port} - #{e.message}") + create_error_response('408', 'Request Timeout', 'Timeout', e.message) + rescue SocketError => e + Instana.logger.debug("Socket error connecting to #{@host}:#{@port} - #{e.message}") + create_error_response('502', 'Socket Error', 'Socket error', e.message) + rescue StandardError => e + Instana.logger.debug("Error sending request to #{@host}:#{@port} - #{e.class}: #{e.message}") + create_error_response('500', 'Internal Error', e.class.to_s, e.message) + end end private @@ -71,6 +87,19 @@ def encode_body(data) INSTANA_USE_OJ ? Oj.dump(data, mode: :strict) : JSON.dump(data) # :nocov: end + + def create_error_response(code, message, error_type, error_message) + # Create a mock response object that behaves like Net::HTTPResponse + error_response = Object.new + error_body = JSON.dump(error: error_type, message: error_message) + + error_response.define_singleton_method(:code) { code } + error_response.define_singleton_method(:message) { message } + error_response.define_singleton_method(:body) { error_body } + error_response.define_singleton_method(:is_a?) { |klass| klass == Net::HTTPResponse || super(klass) } + + Response.new(error_response) + end end end end diff --git a/lib/instana/trace/span.rb b/lib/instana/trace/span.rb index c856f250..c681abf7 100644 --- a/lib/instana/trace/span.rb +++ b/lib/instana/trace/span.rb @@ -87,13 +87,13 @@ def add_stack(span_stack_config: nil, stack: Kernel.caller) @attributes[:stack] = stack .map do |call| - file, line, *method = call.split(':') + file, line, *method = call.split(':') - { - c: file, - n: line, - m: method.join(' ') - } + { + c: file, + n: line, + m: method.join(' ') + } end.take([limit, 40].min) end diff --git a/test/backend/host_agent_lookup_test.rb b/test/backend/host_agent_lookup_test.rb index c8c836cd..71fa9bb9 100644 --- a/test/backend/host_agent_lookup_test.rb +++ b/test/backend/host_agent_lookup_test.rb @@ -75,4 +75,19 @@ def test_lookup_with_gateway_no_destination assert_nil client end + + def test_lookup_handles_connection_errors + # Test that various connection errors result in nil client + stub_request(:get, "http://10.10.10.10:42699/") + .to_raise(Errno::ECONNREFUSED) + + subject = Instana::Backend::HostAgentLookup.new('10.10.10.10', 42699) + + client = FakeFS.with_fresh do + FakeFS::FileSystem.clone('test/support/ecs', '/proc') + subject.call + end + + assert_nil client + end end diff --git a/test/backend/host_agent_reporting_observer_test.rb b/test/backend/host_agent_reporting_observer_test.rb index 5f88fddb..c0d50b5d 100644 --- a/test/backend/host_agent_reporting_observer_test.rb +++ b/test/backend/host_agent_reporting_observer_test.rb @@ -41,6 +41,12 @@ def test_report_fail .to_return(status: 200) stub_request(:post, "http://10.10.10.10:9292/com.instana.plugin.ruby.0") .to_return(status: 500) + stub_request(:get, "http://127.0.0.1:42699/") + .to_return(status: 200) + stub_request(:put, "http://127.0.0.1:42699/com.instana.plugin.ruby.discovery") + .to_return(status: 200, body: '{"pid": 0}') + stub_request(:head, "http://127.0.0.1:42699/com.instana.plugin.ruby.0") + .to_return(status: 200) client = Instana::Backend::RequestClient.new('10.10.10.10', 9292) discovery = Concurrent::Atom.new({'pid' => 0}) @@ -240,6 +246,13 @@ def test_report_traces_error stub_request(:post, "http://10.10.10.10:9292/com.instana.plugin.ruby/traces.1234") .to_return(status: 500) + stub_request(:get, "http://127.0.0.1:42699/") + .to_return(status: 200) + stub_request(:put, "http://127.0.0.1:42699/com.instana.plugin.ruby.discovery") + .to_return(status: 200, body: '{"pid": 1234}') + stub_request(:head, "http://127.0.0.1:42699/com.instana.plugin.ruby.1234") + .to_return(status: 200) + client = Instana::Backend::RequestClient.new('10.10.10.10', 9292) discovery = Concurrent::Atom.new({'pid' => 1234}) diff --git a/test/backend/host_agent_test.rb b/test/backend/host_agent_test.rb index cf2efe00..e98d4e42 100644 --- a/test/backend/host_agent_test.rb +++ b/test/backend/host_agent_test.rb @@ -86,4 +86,55 @@ def test_after_fork subject = Instana::Backend::HostAgent.new assert subject.respond_to? :after_fork end + + def test_announce_retries_on_connection_failure + agent_host = '10.10.10.10' + ::Instana.config[:agent_host] = agent_host + + # Simulate connection failures followed by success + stub_request(:get, "http://#{agent_host}:42699/") + .to_raise(Errno::ECONNREFUSED).times(3).then + .to_return(status: 200, body: "", headers: {}) + + discovery = Minitest::Mock.new + discovery.expect(:delete_observers, discovery, []) + discovery.expect(:observers, discovery, []) + discovery.expect(:notify_and_delete_observers, discovery, [Object, nil, nil]) + discovery.expect(:with_observer, discovery, [Instana::Backend::HostAgentActivationObserver]) + discovery.expect(:with_observer, discovery, [Instana::Backend::HostAgentReportingObserver]) + discovery.expect(:swap, discovery, []) + + subject = Instana::Backend::HostAgent.new(discovery: discovery) + + FakeFS.with_fresh do + FakeFS::FileSystem.clone('test/support/ecs', '/proc') + client = subject.announce + assert client + assert_instance_of Instana::Backend::RequestClient, client + end + + discovery.verify + ensure + ::Instana.config[:agent_host] = '127.0.0.1' + end + + def test_announce_returns_nil_after_max_retries + agent_host = '10.10.10.10' + ::Instana.config[:agent_host] = agent_host + + # Simulate persistent connection failures + stub_request(:get, "http://#{agent_host}:42699/") + .to_raise(Errno::ECONNREFUSED).times(15) + + discovery = Concurrent::Atom.new(nil) + subject = Instana::Backend::HostAgent.new(discovery: discovery) + + FakeFS.with_fresh do + FakeFS::FileSystem.clone('test/support/ecs', '/proc') + client = subject.announce + assert_nil client + end + ensure + ::Instana.config[:agent_host] = '127.0.0.1' + end end diff --git a/test/backend/request_client_test.rb b/test/backend/request_client_test.rb index 9a58c698..343b0efd 100644 --- a/test/backend/request_client_test.rb +++ b/test/backend/request_client_test.rb @@ -36,4 +36,48 @@ def test_send_request_failure refute response.ok? end + + def test_connection_errors_return_error_responses + subject = Instana::Backend::RequestClient.new('example.com', 9292) + + # Test ECONNREFUSED + stub_request(:get, 'http://example.com:9292/refused') + .to_raise(Errno::ECONNREFUSED) + response = subject.send_request('GET', '/refused') + refute response.ok? + assert_equal '503', response.code + assert_includes response.body, 'Connection refused' + + # Test EHOSTUNREACH + stub_request(:get, 'http://example.com:9292/unreachable') + .to_raise(Errno::EHOSTUNREACH) + response = subject.send_request('GET', '/unreachable') + refute response.ok? + assert_equal '503', response.code + assert_includes response.body, 'Host unreachable' + + # Test Timeout + stub_request(:get, 'http://example.com:9292/timeout') + .to_timeout + response = subject.send_request('GET', '/timeout') + refute response.ok? + assert_equal '408', response.code + assert_includes response.body, 'Timeout' + + # Test SocketError + stub_request(:get, 'http://example.com:9292/socket') + .to_raise(SocketError.new('Name or service not known')) + response = subject.send_request('GET', '/socket') + refute response.ok? + assert_equal '502', response.code + assert_includes response.body, 'Socket error' + + # Test StandardError + stub_request(:get, 'http://example.com:9292/error') + .to_raise(StandardError.new('Unexpected error')) + response = subject.send_request('GET', '/error') + refute response.ok? + assert_equal '500', response.code + assert_includes response.body, 'StandardError' + end end