Skip to content

Commit afda194

Browse files
nic-6443RevolyssupAlinsRan
authored
feat: add more spans to opentelemetry plugin (#12686)
Signed-off-by: Nic <[email protected]> Co-authored-by: Ashish Tiwari <[email protected]> Co-authored-by: AlinsRan <[email protected]>
1 parent 85bb628 commit afda194

File tree

15 files changed

+857
-52
lines changed

15 files changed

+857
-52
lines changed

apisix/cli/config.lua

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ local _M = {
8484
neg_ttl = 60,
8585
neg_count = 512
8686
}
87-
}
87+
},
88+
tracing = false
8889
},
8990
nginx_config = {
9091
error_log = "logs/error.log",

apisix/core/response.lua

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
--
2020
-- @module core.response
2121

22+
local tracer = require("apisix.tracer")
2223
local encode_json = require("cjson.safe").encode
2324
local ngx = ngx
2425
local arg = ngx.arg
@@ -86,6 +87,9 @@ function resp_exit(code, ...)
8687
end
8788

8889
if code then
90+
if code >= 400 then
91+
tracer.finish_all(ngx.ctx, tracer.status.ERROR, "response code " .. code)
92+
end
8993
return ngx_exit(code)
9094
end
9195
end

apisix/init.lua

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ local debug = require("apisix.debug")
4747
local pubsub_kafka = require("apisix.pubsub.kafka")
4848
local resource = require("apisix.resource")
4949
local trusted_addresses_util = require("apisix.utils.trusted-addresses")
50+
local tracer = require("apisix.tracer")
51+
5052
local discovery = require("apisix.discovery.init").discovery
5153
local ngx = ngx
5254
local get_method = ngx.req.get_method
@@ -202,6 +204,9 @@ function _M.ssl_client_hello_phase()
202204
local ngx_ctx = ngx.ctx
203205
local api_ctx = core.tablepool.fetch("api_ctx", 0, 32)
204206
ngx_ctx.api_ctx = api_ctx
207+
api_ctx.ngx_ctx = ngx_ctx
208+
209+
local span = tracer.start(ngx_ctx, "ssl_client_hello_phase", tracer.kind.server)
205210

206211
local ok, err = router.router_ssl.match_and_set(api_ctx, true, sni)
207212

@@ -215,18 +220,23 @@ function _M.ssl_client_hello_phase()
215220
core.log.error("failed to fetch ssl config: ", err)
216221
end
217222
core.log.error("failed to match any SSL certificate by SNI: ", sni)
223+
span:set_status(tracer.status.ERROR, "no matched SSL")
224+
span:finish(ngx_ctx)
218225
ngx_exit(-1)
219226
end
220227

221228
ok, err = apisix_ssl.set_protocols_by_clienthello(ngx_ctx.matched_ssl.value.ssl_protocols)
222229
if not ok then
223230
core.log.error("failed to set ssl protocols: ", err)
231+
span:set_status(tracer.status.ERROR, "failed set protocols")
232+
span:finish(ngx_ctx)
224233
ngx_exit(-1)
225234
end
226235

227236
-- in stream subsystem, ngx.ssl.server_name() return hostname of ssl session in preread phase,
228237
-- so that we can't get real SNI without recording it in ngx.ctx during client_hello phase
229238
ngx.ctx.client_hello_sni = sni
239+
span:finish(ngx_ctx)
230240
end
231241

232242

@@ -480,7 +490,6 @@ local function common_phase(phase_name)
480490
end
481491

482492

483-
484493
function _M.handle_upstream(api_ctx, route, enable_websocket)
485494
-- some plugins(ai-proxy...) request upstream by http client directly
486495
if api_ctx.bypass_nginx_upstream then
@@ -677,9 +686,12 @@ function _M.http_access_phase()
677686
-- always fetch table from the table pool, we don't need a reused api_ctx
678687
local api_ctx = core.tablepool.fetch("api_ctx", 0, 32)
679688
ngx_ctx.api_ctx = api_ctx
689+
api_ctx.ngx_ctx = ngx_ctx
680690

681691
core.ctx.set_vars_meta(api_ctx)
682692

693+
local span = tracer.start(ngx_ctx, "apisix.phase.access", tracer.kind.server)
694+
683695
if not verify_https_client(api_ctx) then
684696
return core.response.exit(400)
685697
end
@@ -717,10 +729,13 @@ function _M.http_access_phase()
717729

718730
handle_x_forwarded_headers(api_ctx)
719731

732+
local match_span = tracer.start(ngx_ctx, "http_router_match", tracer.kind.internal)
720733
router.router_http.match(api_ctx)
721734

722735
local route = api_ctx.matched_route
723736
if not route then
737+
match_span:set_status(tracer.status.ERROR, "no matched route")
738+
match_span:finish(ngx.ctx)
724739
-- run global rule when there is no matching route
725740
local global_rules, conf_version = apisix_global_rules.global_rules()
726741
plugin.run_global_rules(api_ctx, global_rules, conf_version, nil)
@@ -729,6 +744,7 @@ function _M.http_access_phase()
729744
return core.response.exit(404,
730745
{error_msg = "404 Route Not Found"})
731746
end
747+
match_span:finish(ngx_ctx)
732748

733749
core.log.info("matched route: ",
734750
core.json.delay_encode(api_ctx.matched_route, true))
@@ -785,7 +801,6 @@ function _M.http_access_phase()
785801
else
786802
local plugins = plugin.filter(api_ctx, route)
787803
api_ctx.plugins = plugins
788-
789804
plugin.run_plugin("rewrite", plugins, api_ctx)
790805
if api_ctx.consumer then
791806
local changed
@@ -821,6 +836,7 @@ function _M.http_access_phase()
821836
end
822837
plugin.run_plugin("access", plugins, api_ctx)
823838
end
839+
span:finish(ngx_ctx)
824840

825841
_M.handle_upstream(api_ctx, route, enable_websocket)
826842

@@ -879,6 +895,8 @@ end
879895

880896

881897
function _M.http_header_filter_phase()
898+
local ngx_ctx = ngx.ctx
899+
local span = tracer.start(ngx_ctx, "apisix.phase.header_filter", tracer.kind.server)
882900
core.response.set_header("Server", ver_header)
883901

884902
local up_status = get_var("upstream_status")
@@ -901,6 +919,9 @@ function _M.http_header_filter_phase()
901919
end
902920
core.response.set_header("Apisix-Plugins", core.table.concat(deduplicate, ", "))
903921
end
922+
span:finish(ngx_ctx)
923+
924+
tracer.start(ngx_ctx, "apisix.phase.body_filter", tracer.kind.server)
904925
end
905926

906927

@@ -1056,6 +1077,7 @@ function _M.http_log_phase()
10561077
if not api_ctx then
10571078
return
10581079
end
1080+
tracer.finish_all(api_ctx.ngx_ctx)
10591081

10601082
if not api_ctx.var.apisix_upstream_response_time or
10611083
api_ctx.var.apisix_upstream_response_time == "" then
@@ -1081,6 +1103,9 @@ function _M.http_log_phase()
10811103
core.tablepool.release("matched_route_record", api_ctx.curr_req_matched)
10821104
end
10831105

1106+
tracer.release(api_ctx.ngx_ctx)
1107+
api_ctx.ngx_ctx = nil
1108+
10841109
core.tablepool.release("api_ctx", api_ctx)
10851110
end
10861111

apisix/plugin.lua

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ local tostring = tostring
3838
local error = error
3939
local getmetatable = getmetatable
4040
local setmetatable = setmetatable
41+
local tracer = require("apisix.tracer")
4142
-- make linter happy to avoid error: getting the Lua global "load"
4243
-- luacheck: globals load, ignore lua_load
4344
local lua_load = load
@@ -1228,7 +1229,10 @@ function _M.run_plugin(phase, plugins, api_ctx)
12281229
plugin_run = true
12291230
run_meta_pre_function(conf, api_ctx, plugins[i]["name"])
12301231
api_ctx._plugin_name = plugins[i]["name"]
1232+
local span = tracer.start(api_ctx.ngx_ctx, "apisix.phase." .. phase
1233+
.. ".plugins." .. api_ctx._plugin_name)
12311234
phase_func(conf, api_ctx)
1235+
span:finish(api_ctx.ngx_ctx)
12321236
api_ctx._plugin_name = nil
12331237
end
12341238
end
@@ -1301,6 +1305,7 @@ end
13011305

13021306
function _M.run_global_rules(api_ctx, global_rules, conf_version, phase_name)
13031307
if global_rules and #global_rules > 0 then
1308+
local span = tracer.start(api_ctx.ngx_ctx, "run_global_rules", tracer.kind.internal)
13041309
local orig_conf_type = api_ctx.conf_type
13051310
local orig_conf_version = api_ctx.conf_version
13061311
local orig_conf_id = api_ctx.conf_id
@@ -1335,6 +1340,7 @@ function _M.run_global_rules(api_ctx, global_rules, conf_version, phase_name)
13351340
api_ctx.conf_type = orig_conf_type
13361341
api_ctx.conf_version = orig_conf_version
13371342
api_ctx.conf_id = orig_conf_id
1343+
span:finish(api_ctx.ngx_ctx)
13381344
end
13391345
end
13401346

apisix/plugins/opentelemetry.lua

Lines changed: 73 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ local pairs = pairs
4848
local ipairs = ipairs
4949
local unpack = unpack
5050
local string_format = string.format
51+
local update_time = ngx.update_time
5152

5253
local lrucache = core.lrucache.new({
5354
type = 'plugin', count = 128, ttl = 24 * 60 * 60,
@@ -327,10 +328,17 @@ function _M.rewrite(conf, api_ctx)
327328

328329
local attributes = {
329330
attr.string("net.host.name", vars.host),
331+
-- deprecated attributes
330332
attr.string("http.method", vars.method),
331333
attr.string("http.scheme", vars.scheme),
332334
attr.string("http.target", vars.request_uri),
333335
attr.string("http.user_agent", vars.http_user_agent),
336+
337+
-- new attributes
338+
attr.string("http.request.method", vars.method),
339+
attr.string("url.scheme", vars.scheme),
340+
attr.string("uri.path", vars.uri),
341+
attr.string("user_agent.original", vars.http_user_agent),
334342
}
335343

336344
if api_ctx.curr_req_matched then
@@ -376,48 +384,96 @@ function _M.rewrite(conf, api_ctx)
376384
ngx_var.opentelemetry_span_id = span_context.span_id
377385
end
378386

387+
if not ctx:span():is_recording() and ngx.ctx.tracing then
388+
ngx.ctx.tracing.skip = true
389+
end
390+
379391
api_ctx.otel_context_token = ctx:attach()
380392

381393
-- inject trace context into the headers of upstream HTTP request
382394
trace_context_propagator:inject(ctx, ngx.req)
383395
end
384396

385397

386-
function _M.delayed_body_filter(conf, api_ctx)
387-
if api_ctx.otel_context_token and ngx.arg[2] then
388-
local ctx = context:current()
389-
ctx:detach(api_ctx.otel_context_token)
390-
api_ctx.otel_context_token = nil
398+
local function create_child_span(tracer, parent_span_ctx, spans, span)
399+
if not span or span.finished then
400+
return
401+
end
402+
span.finished = true
403+
local new_span_ctx, new_span = tracer:start(parent_span_ctx, span.name,
404+
{
405+
kind = span.kind,
406+
attributes = span.attributes,
407+
})
408+
new_span.start_time = span.start_time
409+
410+
for _, idx in ipairs(span.child_ids or {}) do
411+
create_child_span(tracer, new_span_ctx, spans, spans[idx])
412+
end
413+
if span.status then
414+
new_span:set_status(span.status.code, span.status.message)
415+
end
416+
new_span:finish(span.end_time)
417+
end
391418

392-
-- get span from current context
393-
local span = ctx:span()
394-
local upstream_status = core.response.get_upstream_status(api_ctx)
395-
if upstream_status and upstream_status >= 500 then
396-
span:set_status(span_status.ERROR,
397-
"upstream response status: " .. upstream_status)
398-
end
399419

400-
span:set_attributes(attr.int("http.status_code", upstream_status))
420+
local function inject_core_spans(root_span_ctx, api_ctx, conf)
421+
local tracing = api_ctx.ngx_ctx.tracing
422+
if not tracing then
423+
return
424+
end
401425

402-
span:finish()
426+
local span = root_span_ctx:span()
427+
428+
local metadata = plugin.plugin_metadata(plugin_name)
429+
local plugin_info = metadata.value
430+
if span and not span:is_recording() then
431+
return
432+
end
433+
local inject_conf = {
434+
sampler = {
435+
name = "always_on",
436+
options = conf.sampler.options
437+
},
438+
additional_attributes = conf.additional_attributes,
439+
additional_header_prefix_attributes = conf.additional_header_prefix_attributes
440+
}
441+
local tracer, err = core.lrucache.plugin_ctx(lrucache, api_ctx, nil,
442+
create_tracer_obj, inject_conf, plugin_info)
443+
if not tracer then
444+
core.log.error("failed to fetch tracer object: ", err)
445+
return
446+
end
447+
448+
if #tracing.spans == 0 then
449+
return
450+
end
451+
span.start_time = tracing.spans[1].start_time
452+
local root_span = tracing.root_span
453+
local spans = tracing.spans
454+
for _, idx in ipairs(root_span.child_ids or {}) do
455+
create_child_span(tracer, root_span_ctx, spans, spans[idx])
403456
end
404457
end
405458

406459

407-
-- body_filter maybe not called because of empty http body response
408-
-- so we need to check if the span has finished in log phase
409460
function _M.log(conf, api_ctx)
410461
if api_ctx.otel_context_token then
411462
-- ctx:detach() is not necessary, because of ctx is stored in ngx.ctx
412463
local upstream_status = core.response.get_upstream_status(api_ctx)
413464

414465
-- get span from current context
415-
local span = context:current():span()
466+
local ctx = context:current()
467+
local span = ctx:span()
416468
if upstream_status and upstream_status >= 500 then
417469
span:set_status(span_status.ERROR,
418470
"upstream response status: " .. upstream_status)
419471
end
420472

473+
inject_core_spans(ctx, api_ctx, conf)
474+
span:set_attributes(attr.int("http.status_code", upstream_status),
475+
attr.int("http.response.status_code", upstream_status))
476+
update_time()
421477
span:finish()
422478
end
423479
end

apisix/secret.lua

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
local require = require
1919
local core = require("apisix.core")
2020
local string = require("apisix.core.string")
21+
local tracer = require("apisix.tracer")
2122

2223
local local_conf = require("apisix.core.config_local").local_conf()
2324

@@ -28,6 +29,7 @@ local byte = string.byte
2829
local type = type
2930
local pcall = pcall
3031
local pairs = pairs
32+
local ngx = ngx
3133

3234
local _M = {}
3335

@@ -148,16 +150,20 @@ local function fetch_by_uri_secret(secret_uri)
148150
return nil, "no secret conf, secret_uri: " .. secret_uri
149151
end
150152

153+
local span = tracer.start(ngx.ctx, "fetch_secret", tracer.kind.client)
151154
local ok, sm = pcall(require, "apisix.secret." .. opts.manager)
152155
if not ok then
153156
return nil, "no secret manager: " .. opts.manager
154157
end
155158

156159
local value, err = sm.get(conf, opts.key)
157160
if err then
161+
span:set_status(tracer.status.ERROR, err)
162+
span:finish(ngx.ctx)
158163
return nil, err
159164
end
160165

166+
span:finish(ngx.ctx)
161167
return value
162168
end
163169

0 commit comments

Comments
 (0)