@@ -32,14 +32,10 @@ import (
32
32
"time"
33
33
)
34
34
35
- var bufferPool = sync.Pool {New : func () interface {} {
36
- return & bytes.Buffer {}
37
- }}
38
-
39
- type ApmServerTransportStatusType string
40
-
41
35
// Constants for the state of the transport used in
42
36
// the backoff implementation.
37
+ type ApmServerTransportStatusType string
38
+
43
39
const (
44
40
Failing ApmServerTransportStatusType = "Failing"
45
41
Pending ApmServerTransportStatusType = "Pending"
@@ -48,30 +44,84 @@ const (
48
44
49
45
// A struct to track the state and status of sending
50
46
// to the APM server. Used in the backoff implementation.
51
- type ApmServerTransportStateType struct {
47
+ type ApmServerTransport struct {
52
48
sync.Mutex
53
- Status ApmServerTransportStatusType
54
- ReconnectionCount int
55
- GracePeriodTimer * time.Timer
49
+ bufferPool sync.Pool
50
+ config * extensionConfig
51
+ AgentDoneSignal chan struct {}
52
+ dataChannel chan AgentData
53
+ client * http.Client
54
+ status ApmServerTransportStatusType
55
+ reconnectionCount int
56
+ gracePeriodTimer * time.Timer
56
57
}
57
58
58
- // The status of transport to the APM server.
59
- //
60
- // This instance of the ApmServerTransportStateType is public for use in tests.
61
- var ApmServerTransportState = ApmServerTransportStateType {
62
- Status : Healthy ,
63
- ReconnectionCount : - 1 ,
59
+ func InitApmServerTransport (config * extensionConfig ) * ApmServerTransport {
60
+ var transport ApmServerTransport
61
+ transport .bufferPool = sync.Pool {New : func () interface {} {
62
+ return & bytes.Buffer {}
63
+ }}
64
+ transport .dataChannel = make (chan AgentData , 100 )
65
+ transport .client = & http.Client {
66
+ Timeout : time .Duration (config .DataForwarderTimeoutSeconds ) * time .Second ,
67
+ Transport : http .DefaultTransport .(* http.Transport ).Clone (),
68
+ }
69
+ transport .config = config
70
+ transport .status = Healthy
71
+ transport .reconnectionCount = - 1
72
+ return & transport
73
+ }
74
+
75
+ // StartBackgroundApmDataForwarding Receive agent data as it comes in and post it to the APM server.
76
+ // Stop checking for, and sending agent data when the function invocation
77
+ // has completed, signaled via a channel.
78
+ func (transport * ApmServerTransport ) ForwardApmData (ctx context.Context ) error {
79
+ if transport .status == Failing {
80
+ return nil
81
+ }
82
+ for {
83
+ select {
84
+ case <- ctx .Done ():
85
+ Log .Debug ("Invocation context cancelled, not processing any more agent data" )
86
+ return nil
87
+ case agentData := <- transport .dataChannel :
88
+ if err := transport .PostToApmServer (ctx , agentData ); err != nil {
89
+ return fmt .Errorf ("error sending to APM server, skipping: %v" , err )
90
+ }
91
+ }
92
+ }
93
+ }
94
+
95
+ // FlushAPMData reads all the apm data in the apm data channel and sends it to the APM server.
96
+ func (transport * ApmServerTransport ) FlushAPMData (ctx context.Context ) {
97
+ if transport .status == Failing {
98
+ Log .Debug ("Flush skipped - Transport failing" )
99
+ return
100
+ }
101
+ Log .Debug ("Flush started - Checking for agent data" )
102
+ for {
103
+ select {
104
+ case agentData := <- transport .dataChannel :
105
+ Log .Debug ("Flush in progress - Processing agent data" )
106
+ if err := transport .PostToApmServer (ctx , agentData ); err != nil {
107
+ Log .Errorf ("Error sending to APM server, skipping: %v" , err )
108
+ }
109
+ default :
110
+ Log .Debug ("Flush ended - No agent data on buffer" )
111
+ return
112
+ }
113
+ }
64
114
}
65
115
66
116
// PostToApmServer takes a chunk of APM agent data and posts it to the APM server.
67
117
//
68
118
// The function compresses the APM agent data, if it's not already compressed.
69
119
// It sets the APM transport status to failing upon errors, as part of the backoff
70
120
// strategy.
71
- func PostToApmServer ( client * http. Client , agentData AgentData , config * extensionConfig , ctx context.Context ) error {
121
+ func ( transport * ApmServerTransport ) PostToApmServer ( ctx context.Context , agentData AgentData ) error {
72
122
// todo: can this be a streaming or streaming style call that keeps the
73
123
// connection open across invocations?
74
- if ! IsTransportStatusHealthyOrPending () {
124
+ if transport . status == Failing {
75
125
return errors .New ("transport status is unhealthy" )
76
126
}
77
127
@@ -83,10 +133,10 @@ func PostToApmServer(client *http.Client, agentData AgentData, config *extension
83
133
r = bytes .NewReader (agentData .Data )
84
134
} else {
85
135
encoding = "gzip"
86
- buf := bufferPool .Get ().(* bytes.Buffer )
136
+ buf := transport . bufferPool .Get ().(* bytes.Buffer )
87
137
defer func () {
88
138
buf .Reset ()
89
- bufferPool .Put (buf )
139
+ transport . bufferPool .Put (buf )
90
140
}()
91
141
gw , err := gzip .NewWriterLevel (buf , gzip .BestSpeed )
92
142
if err != nil {
@@ -101,98 +151,90 @@ func PostToApmServer(client *http.Client, agentData AgentData, config *extension
101
151
r = buf
102
152
}
103
153
104
- req , err := http .NewRequest ("POST" , config .apmServerUrl + endpointURI , r )
154
+ req , err := http .NewRequest ("POST" , transport . config .apmServerUrl + endpointURI , r )
105
155
if err != nil {
106
156
return fmt .Errorf ("failed to create a new request when posting to APM server: %v" , err )
107
157
}
108
158
req .Header .Add ("Content-Encoding" , encoding )
109
159
req .Header .Add ("Content-Type" , "application/x-ndjson" )
110
- if config .apmServerApiKey != "" {
111
- req .Header .Add ("Authorization" , "ApiKey " + config .apmServerApiKey )
112
- } else if config .apmServerSecretToken != "" {
113
- req .Header .Add ("Authorization" , "Bearer " + config .apmServerSecretToken )
160
+ if transport . config .apmServerApiKey != "" {
161
+ req .Header .Add ("Authorization" , "ApiKey " + transport . config .apmServerApiKey )
162
+ } else if transport . config .apmServerSecretToken != "" {
163
+ req .Header .Add ("Authorization" , "Bearer " + transport . config .apmServerSecretToken )
114
164
}
115
165
116
- Log .Debug ("Sending data chunk to APM Server " )
117
- resp , err := client .Do (req )
166
+ Log .Debug ("Sending data chunk to APM server " )
167
+ resp , err := transport . client .Do (req )
118
168
if err != nil {
119
- SetApmServerTransportState (Failing , ctx )
169
+ transport . SetApmServerTransportState (ctx , Failing )
120
170
return fmt .Errorf ("failed to post to APM server: %v" , err )
121
171
}
122
172
123
173
//Read the response body
124
174
defer resp .Body .Close ()
125
175
body , err := ioutil .ReadAll (resp .Body )
126
176
if err != nil {
127
- SetApmServerTransportState (Failing , ctx )
177
+ transport . SetApmServerTransportState (ctx , Failing )
128
178
return fmt .Errorf ("failed to read the response body after posting to the APM server" )
129
179
}
130
180
131
- SetApmServerTransportState (Healthy , ctx )
181
+ transport . SetApmServerTransportState (ctx , Healthy )
132
182
Log .Debug ("Transport status set to healthy" )
133
183
Log .Debugf ("APM server response body: %v" , string (body ))
134
184
Log .Debugf ("APM server response status code: %v" , resp .StatusCode )
135
185
return nil
136
186
}
137
187
138
- // IsTransportStatusHealthyOrPending returns true if the APM server transport status is
139
- // healthy or pending, and false otherwise.
140
- //
141
- // This function is public for use in tests.
142
- func IsTransportStatusHealthyOrPending () bool {
143
- return ApmServerTransportState .Status != Failing
144
- }
145
-
146
188
// SetApmServerTransportState takes a state of the APM server transport and updates
147
189
// the current state of the transport. For a change to a failing state, the grace period
148
190
// is calculated and a go routine is started that waits for that period to complete
149
191
// before changing the status to "pending". This would allow a subsequent send attempt
150
192
// to the APM server.
151
193
//
152
194
// This function is public for use in tests.
153
- func SetApmServerTransportState ( status ApmServerTransportStatusType , ctx context.Context ) {
195
+ func ( transport * ApmServerTransport ) SetApmServerTransportState ( ctx context.Context , status ApmServerTransportStatusType ) {
154
196
switch status {
155
197
case Healthy :
156
- ApmServerTransportState .Lock ()
157
- ApmServerTransportState . Status = status
158
- Log .Debugf ("APM Server Transport status set to %s" , status )
159
- ApmServerTransportState . ReconnectionCount = - 1
160
- ApmServerTransportState .Unlock ()
198
+ transport .Lock ()
199
+ transport . status = status
200
+ Log .Debugf ("APM server Transport status set to %s" , transport . status )
201
+ transport . reconnectionCount = - 1
202
+ transport .Unlock ()
161
203
case Failing :
162
- ApmServerTransportState .Lock ()
163
- ApmServerTransportState . Status = status
164
- Log .Debugf ("APM Server Transport status set to %s" , status )
165
- ApmServerTransportState . ReconnectionCount ++
166
- ApmServerTransportState . GracePeriodTimer = time .NewTimer (computeGracePeriod ())
167
- Log .Debugf ("Grace period entered, reconnection count : %d" , ApmServerTransportState . ReconnectionCount )
204
+ transport .Lock ()
205
+ transport . status = status
206
+ Log .Debugf ("APM server Transport status set to %s" , transport . status )
207
+ transport . reconnectionCount ++
208
+ transport . gracePeriodTimer = time .NewTimer (transport . computeGracePeriod ())
209
+ Log .Debugf ("Grace period entered, reconnection count : %d" , transport . reconnectionCount )
168
210
go func () {
169
211
select {
170
- case <- ApmServerTransportState . GracePeriodTimer .C :
212
+ case <- transport . gracePeriodTimer .C :
171
213
Log .Debug ("Grace period over - timer timed out" )
172
214
case <- ctx .Done ():
173
215
Log .Debug ("Grace period over - context done" )
174
216
}
175
- ApmServerTransportState . Status = Pending
176
- Log .Debugf ("APM Server Transport status set to %s" , status )
177
- ApmServerTransportState .Unlock ()
217
+ transport . status = Pending
218
+ Log .Debugf ("APM server Transport status set to %s" , transport . status )
219
+ transport .Unlock ()
178
220
}()
179
221
default :
180
- Log .Errorf ("Cannot set APM Server Transport status to %s" , status )
222
+ Log .Errorf ("Cannot set APM server Transport status to %s" , status )
181
223
}
182
224
}
183
225
184
226
// ComputeGracePeriod https://github.com/elastic/apm/blob/main/specs/agents/transport.md#transport-errors
185
- func computeGracePeriod () time.Duration {
186
- gracePeriodWithoutJitter := math .Pow (math .Min (float64 (ApmServerTransportState . ReconnectionCount ), 6 ), 2 )
227
+ func ( transport * ApmServerTransport ) computeGracePeriod () time.Duration {
228
+ gracePeriodWithoutJitter := math .Pow (math .Min (float64 (transport . reconnectionCount ), 6 ), 2 )
187
229
jitter := rand .Float64 ()/ 5 - 0.1
188
230
return time .Duration ((gracePeriodWithoutJitter + jitter * gracePeriodWithoutJitter ) * float64 (time .Second ))
189
231
}
190
232
191
233
// EnqueueAPMData adds a AgentData struct to the agent data channel, effectively queueing for a send
192
234
// to the APM server.
193
- func EnqueueAPMData ( agentDataChannel chan AgentData , agentData AgentData ) {
235
+ func ( transport * ApmServerTransport ) EnqueueAPMData ( agentData AgentData ) {
194
236
select {
195
- case agentDataChannel <- agentData :
237
+ case transport . dataChannel <- agentData :
196
238
Log .Debug ("Adding agent data to buffer to be sent to apm server" )
197
239
default :
198
240
Log .Warn ("Channel full: dropping a subset of agent data" )
0 commit comments