Skip to content

Commit aeb9c16

Browse files
author
Rob Holland
authored
Add more details to logs to improve debugging (coinbase#43)
* Add more details to logs to improve debugging. * Allow configuring the drain grace period. Defaults to 25 seconds to fit inside the default grace period for kubernetes pod shutdown which is 30 seconds. * Revert "Allow configuring the drain grace period." This reverts commit b051b53. * Log structured data. * This isn't a log line :) * Fix double-logging on workflow task failure. Convert some missed logging calls to metadata versions. * Fix up a few more logging callsites. * PR feedback. * Remove hearbeat details from logging. The user can log from their own code if needed for debugging. * Add log spec. Ensure JSON is in strict mode. * Typo. * Add log method spec. * Match Logger interface.
1 parent d7e70bc commit aeb9c16

30 files changed

+123
-58
lines changed

examples/activities/process_file_activity.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class ProcessFileActivity < Temporal::Activity
1010
def execute(input)
1111
file_contents = File.read(input)
1212

13-
logger.info("Processing file: #{input}")
13+
logger.info("Processing file", { input: input })
1414
logger.info("File contents: #{file_contents}")
1515

1616
raise 'unknown file'

examples/activities/trip/book_flight_activity.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Trip
22
class BookFlightActivity < Temporal::Activity
33
def execute(trip_id)
4-
logger.info "Booking flight for trip #{trip_id}"
4+
logger.info "Booking flight", { trip_id: trip_id }
55

66
return { reservation_id: SecureRandom.uuid, total: rand(0..1000) / 10.0 }
77
end

examples/activities/trip/book_hotel_activity.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Trip
22
class BookHotelActivity < Temporal::Activity
33
def execute(trip_id)
4-
logger.info "Booking hotel room for trip #{trip_id}"
4+
logger.info "Booking hotel room", { trip_id: trip_id }
55

66
return { reservation_id: SecureRandom.uuid, total: rand(0..1000) / 10.0 }
77
end

examples/activities/trip/cancel_car_activity.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Trip
22
class CancelCarActivity < Temporal::Activity
33
def execute(reservation_id)
4-
logger.info "Cancelling car reservation: #{reservation_id}"
4+
logger.info "Cancelling car reservation" { reservation_id: reservation_id }
55

66
return
77
end

examples/activities/trip/cancel_flight_activity.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Trip
22
class CancelFlightActivity < Temporal::Activity
33
def execute(reservation_id)
4-
logger.info "Cancelling flight reservation: #{reservation_id}"
4+
logger.info "Cancelling flight reservation", { reservation_id: reservation_id }
55

66
return
77
end

examples/activities/trip/cancel_hotel_activity.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Trip
22
class CancelHotelActivity < Temporal::Activity
33
def execute(reservation_id)
4-
logger.info "Cancelling hotel reservation: #{reservation_id}"
4+
logger.info "Cancelling hotel reservation" { reservation_id: reservation_id }
55

66
return
77
end

examples/activities/trip/make_payment_activity.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ class InsufficientFunds < Temporal::ActivityException; end
1010
)
1111

1212
def execute(trip_id, total)
13-
logger.info "Processing payment for #{total} (trip_id #{trip_id})"
13+
logger.info "Processing payment", { amount: total, trip_id: trip_id })
1414

15-
raise InsufficientFunds, "Unable to charge #{total}"
15+
raise InsufficientFunds, "Unable to charge: #{total}"
1616
end
1717
end
1818
end

examples/activities/trip/rent_car_activity.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module Trip
22
class RentCarActivity < Temporal::Activity
33
def execute(trip_id)
4-
logger.info "Renting a car for trip #{trip_id}"
4+
logger.info "Renting a car", { trip_id: trip_id }
55

66
return { reservation_id: SecureRandom.uuid, total: rand(0..1000) / 10.0 }
77
end

examples/bin/reset

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ require_relative '../init'
44
namespace, workflow_id, run_id = ARGV
55

66
run_id = Temporal.reset_workflow(namespace, workflow_id, run_id)
7-
Temporal.logger.info "Reset workflow: #{run_id}"
7+
Temporal.logger.info "Reset workflow", { workflow_id: workflow_id, run_id: run_id }

examples/bin/trigger

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ workflow_id = SecureRandom.uuid
1111
input = args.map { |arg| Integer(arg) rescue arg }
1212

1313
run_id = Temporal.start_workflow(workflow_class, *input, options: { workflow_id: workflow_id })
14-
Temporal.logger.info "Started workflow: #{workflow_id} / #{run_id}"
14+
Temporal.logger.info "Started workflow", { workflow_id: workflow_id, run_id: run_id }

examples/middleware/logging_middleware.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ def initialize(app_name)
66
def call(metadata)
77
entity_name = name_from(metadata)
88
entity_type = type_from(metadata)
9-
Temporal.logger.info("[#{app_name}]: Started #{entity_name} #{entity_type}")
9+
Temporal.logger.info("[#{app_name}]: Started", metadata: metadata.to_h)
1010

1111
yield
1212

13-
Temporal.logger.info("[#{app_name}]: Finished #{entity_name} #{entity_type}")
13+
Temporal.logger.info("[#{app_name}]: Finished", metadata: metadata.to_h)
1414
rescue StandardError
15-
Temporal.logger.error("[#{app_name}]: Error #{entity_name}")
15+
Temporal.logger.error("[#{app_name}]: Error", metadata: metadata.to_h)
1616

1717
raise
1818
end

examples/workflows/check_workflow.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@ def execute
99
logger.info(' => Future 1 is ready!') if future_1.ready?
1010

1111
future_1.done do |result|
12-
logger.info("X: future_1 completed at #{workflow.now.strftime('%H:%M:%S.%L')}")
12+
logger.info("X: future_1 completed", { time: workflow.now.strftime('%H:%M:%S.%L') })
1313
end
1414

1515
result = future_2.get
16-
logger.info("X: future_2 completed at #{workflow.now.strftime('%H:%M:%S.%L')}")
16+
logger.info("X: future_2 completed", { time: workflow.now.strftime('%H:%M:%S.%L') })
1717

1818
logger.info(' => Future 3 is ready!') if future_3.ready?
1919
logger.info(' => Future 2 is ready!') if future_2.ready?
2020

2121
future_3.done do |result|
22-
logger.info("X: future_3 completed at #{workflow.now.strftime('%H:%M:%S.%L')}")
22+
logger.info("X: future_3 completed", { time: workflow.now.strftime('%H:%M:%S.%L') })
2323
end
2424

2525
workflow.wait_for_all(future_1, future_2, future_3)

examples/workflows/failing_activities_workflow.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def execute(count)
1919
end
2020
workflow.wait_for_all(*futures)
2121

22-
logger.info("#{futures.count(&:failed?)} activites of #{count} failed")
22+
logger.info("Activities failed", { total: count, failed: futures.count(&:failed?) })
2323

2424
{
2525
finished: futures.count(&:finished?),

examples/workflows/long_workflow.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ def execute(cycles = 10, interval = 1)
55
future = LongRunningActivity.execute(cycles, interval)
66

77
workflow.on_signal do |signal, input|
8-
logger.warn "Signal received: #{signal}"
8+
logger.warn "Signal received", { signal: signal }
99
future.cancel
1010
end
1111

lib/temporal/activity/context.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def async_token
3131
end
3232

3333
def heartbeat(details = nil)
34-
logger.debug('Activity heartbeat')
34+
logger.debug("Activity heartbeat", metadata.to_h)
3535
client.record_activity_task_heartbeat(task_token: task_token, details: details)
3636
end
3737

lib/temporal/activity/poller.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def start
2727

2828
def stop_polling
2929
@shutting_down = true
30-
Temporal.logger.info('Shutting down activity poller')
30+
Temporal.logger.info('Shutting down activity poller', { namespace: namespace, task_queue: task_queue })
3131
end
3232

3333
def cancel_pending_requests
@@ -57,7 +57,7 @@ def poll_loop
5757

5858
return if shutting_down?
5959

60-
Temporal.logger.debug("Polling activity task queue (#{namespace} / #{task_queue})")
60+
Temporal.logger.debug("Polling activity task queue", { namespace: namespace, task_queue: task_queue })
6161

6262
task = poll_for_task
6363
next unless task&.activity_type
@@ -69,7 +69,7 @@ def poll_loop
6969
def poll_for_task
7070
client.poll_activity_task_queue(namespace: namespace, task_queue: task_queue)
7171
rescue StandardError => error
72-
Temporal.logger.error("Unable to poll activity task queue: #{error.inspect}")
72+
Temporal.logger.error("Unable to poll activity task queue", { namespace: namespace, task_queue: task_queue, error: error.inspect })
7373

7474
Temporal::ErrorHandler.handle(error)
7575

lib/temporal/activity/task_processor.rb

+8-8
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ def initialize(task, namespace, activity_lookup, client, middleware_chain)
2121
def process
2222
start_time = Time.now
2323

24-
Temporal.logger.info("Processing activity task for #{activity_name}")
24+
Temporal.logger.debug("Processing Activity task", metadata.to_h)
2525
Temporal.metrics.timing('activity_task.queue_time', queue_time_ms, activity: activity_name)
2626

27+
context = Activity::Context.new(client, metadata)
28+
2729
if !activity_class
2830
raise ActivityNotRegistered, 'Activity is not registered with this worker'
2931
end
3032

31-
context = Activity::Context.new(client, metadata)
32-
3333
result = middleware_chain.invoke(metadata) do
3434
activity_class.execute_in_context(context, parse_payload(task.input))
3535
end
@@ -43,7 +43,7 @@ def process
4343
ensure
4444
time_diff_ms = ((Time.now - start_time) * 1000).round
4545
Temporal.metrics.timing('activity_task.latency', time_diff_ms, activity: activity_name)
46-
Temporal.logger.debug("Activity task processed in #{time_diff_ms}ms")
46+
Temporal.logger.debug("Activity task processed", metadata.to_h.merge(execution_time: time_diff_ms))
4747
end
4848

4949
private
@@ -57,19 +57,19 @@ def queue_time_ms
5757
end
5858

5959
def respond_completed(result)
60-
Temporal.logger.info("Activity #{activity_name} completed")
60+
Temporal.logger.info("Activity task completed", metadata.to_h)
6161
client.respond_activity_task_completed(task_token: task_token, result: result)
6262
rescue StandardError => error
63-
Temporal.logger.error("Unable to complete Activity #{activity_name}: #{error.inspect}")
63+
Temporal.logger.error("Unable to complete Activity", metadata.to_h.merge(error: error.inspect))
6464

6565
Temporal::ErrorHandler.handle(error, metadata: metadata)
6666
end
6767

6868
def respond_failed(error)
69-
Temporal.logger.error("Activity #{activity_name} failed with: #{error.inspect}")
69+
Temporal.logger.error("Activity task failed", metadata.to_h.merge(error: error.inspect))
7070
client.respond_activity_task_failed(task_token: task_token, exception: error)
7171
rescue StandardError => error
72-
Temporal.logger.error("Unable to fail Activity #{activity_name}: #{error.inspect}")
72+
Temporal.logger.error("Unable to fail Activity task", metadata.to_h.merge(error: error.inspect))
7373

7474
Temporal::ErrorHandler.handle(error, metadata: metadata)
7575
end

lib/temporal/configuration.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
require 'logger'
1+
require 'temporal/logger'
22
require 'temporal/metrics_adapters/null'
33

44
module Temporal
@@ -26,7 +26,7 @@ class Configuration
2626

2727
def initialize
2828
@client_type = :grpc
29-
@logger = Logger.new(STDOUT, progname: 'temporal_client')
29+
@logger = Temporal::Logger.new(STDOUT, progname: 'temporal_client')
3030
@metrics_adapter = MetricsAdapters::Null.new
3131
@timeouts = DEFAULT_TIMEOUTS
3232
@namespace = DEFAULT_NAMESPACE

lib/temporal/error_handler.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ def self.handle(error, metadata: nil)
44
Temporal.configuration.error_handlers.each do |handler|
55
handler.call(error, metadata: metadata)
66
rescue StandardError => e
7-
Temporal.logger.error("Error handler failed: #{e.inspect}")
7+
Temporal.logger.error("Error handler failed", { error: e.inspect })
88
end
99
end
1010
end

lib/temporal/logger.rb

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
require 'logger'
2+
3+
module Temporal
4+
class Logger < ::Logger
5+
SEVERITIES = %i[debug info warn error fatal unknown].freeze
6+
7+
SEVERITIES.each do |severity|
8+
define_method severity do |message, data = {}|
9+
super(message.to_s + ' ' + Oj.dump(data, mode: :strict))
10+
end
11+
end
12+
13+
def log(severity, message, data = {})
14+
add(severity, message.to_s + ' ' + Oj.dump(data, mode: :strict))
15+
end
16+
end
17+
end

lib/temporal/metadata/activity.rb

+1-2
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ def to_h
3232
'workflow_run_id' => workflow_run_id,
3333
'activity_id' => id,
3434
'activity_name' => name,
35-
'attempt' => attempt,
36-
'heartbeat_details' => heartbeat_details
35+
'attempt' => attempt
3736
}
3837
end
3938
end

lib/temporal/saga/concern.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def run_saga(&block)
1111

1212
Result.new(true)
1313
rescue StandardError => error # TODO: is there a need for a specialized error here?
14-
logger.error("Saga execution aborted: #{error.inspect}")
14+
logger.error("Saga execution aborted", { error: error.inspect })
1515
logger.debug(error.backtrace.join("\n"))
1616

1717
saga.compensate

lib/temporal/workflow.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def self.execute_in_context(context, input)
1717

1818
context.complete(result) unless context.completed?
1919
rescue StandardError, ScriptError => error
20-
Temporal.logger.error("Workflow execution failed with: #{error.inspect}")
20+
Temporal.logger.error("Workflow execution failed", context.metadata.to_h.merge(error: error.inspect))
2121
Temporal.logger.debug(error.backtrace.join("\n"))
2222

2323
Temporal::ErrorHandler.handle(error, metadata: context.metadata)

lib/temporal/workflow/poller.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def poll_loop
5757

5858
return if shutting_down?
5959

60-
Temporal.logger.debug("Polling worklow task queue (#{namespace} / #{task_queue})")
60+
Temporal.logger.debug("Polling Worklow task queue", { namespace: namespace, task_queue: task_queue })
6161

6262
task = poll_for_task
6363
next unless task&.workflow_type
@@ -69,7 +69,7 @@ def poll_loop
6969
def poll_for_task
7070
client.poll_workflow_task_queue(namespace: namespace, task_queue: task_queue)
7171
rescue StandardError => error
72-
Temporal.logger.error("Unable to poll workflow task queue: #{error.inspect}")
72+
Temporal.logger.error("Unable to poll Workflow task queue", { namespace: namespace, task_queue: task_queue, error: error.inspect })
7373

7474
Temporal::ErrorHandler.handle(error)
7575

lib/temporal/workflow/replay_aware_logger.rb

+4-4
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@ def initialize(main_logger, replay = true)
1111
end
1212

1313
SEVERITIES.each do |severity|
14-
define_method severity do |message|
14+
define_method severity do |message, data = {}|
1515
return if replay?
1616

17-
main_logger.public_send(severity, message)
17+
main_logger.public_send(severity, message, data)
1818
end
1919
end
2020

21-
def log(severity, message)
21+
def log(severity, message, data = {})
2222
return if replay?
2323

24-
main_logger.log(severity, message)
24+
main_logger.log(severity, message, data)
2525
end
2626

2727
private

lib/temporal/workflow/task_processor.rb

+7-10
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def initialize(task, namespace, workflow_lookup, client, middleware_chain)
2323
def process
2424
start_time = Time.now
2525

26-
Temporal.logger.info("Processing a workflow task for #{workflow_name}")
26+
Temporal.logger.debug("Processing Workflow task", metadata.to_h)
2727
Temporal.metrics.timing('workflow_task.queue_time', queue_time_ms, workflow: workflow_name)
2828

2929
if !workflow_class
@@ -40,16 +40,13 @@ def process
4040

4141
complete_task(commands)
4242
rescue StandardError => error
43-
fail_task(error)
44-
45-
Temporal.logger.error("Workflow task for #{workflow_name} failed with: #{error.inspect}")
46-
Temporal.logger.debug(error.backtrace.join("\n"))
47-
4843
Temporal::ErrorHandler.handle(error, metadata: metadata)
44+
45+
fail_task(error)
4946
ensure
5047
time_diff_ms = ((Time.now - start_time) * 1000).round
5148
Temporal.metrics.timing('workflow_task.latency', time_diff_ms, workflow: workflow_name)
52-
Temporal.logger.debug("Workflow task processed in #{time_diff_ms}ms")
49+
Temporal.logger.debug("Workflow task processed", metadata.to_h.merge(execution_time: time_diff_ms))
5350
end
5451

5552
private
@@ -82,13 +79,13 @@ def fetch_full_history
8279
end
8380

8481
def complete_task(commands)
85-
Temporal.logger.info("Workflow task for #{workflow_name} completed")
82+
Temporal.logger.info("Workflow task completed", metadata.to_h)
8683

8784
client.respond_workflow_task_completed(task_token: task_token, commands: commands)
8885
end
8986

9087
def fail_task(error)
91-
Temporal.logger.error("Workflow task for #{workflow_name} failed with: #{error.inspect}")
88+
Temporal.logger.error("Workflow task failed", metadata.to_h.merge(error: error.inspect))
9289
Temporal.logger.debug(error.backtrace.join("\n"))
9390

9491
# Only fail the workflow task on the first attempt. Subsequent failures of the same workflow task
@@ -102,7 +99,7 @@ def fail_task(error)
10299
exception: error
103100
)
104101
rescue StandardError => error
105-
Temporal.logger.error("Unable to fail Workflow task #{workflow_name}: #{error.inspect}")
102+
Temporal.logger.error("Unable to fail Workflow task", metadata.to_h.merge(error: error.inspect))
106103

107104
Temporal::ErrorHandler.handle(error, metadata: metadata)
108105
end

0 commit comments

Comments
 (0)