-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsys_stat_monitors.rb
executable file
·262 lines (190 loc) · 9.89 KB
/
sys_stat_monitors.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/env ruby
# Must use Ruby 1.9.2+
require 'optparse'
require 'yaml'
# Initialize
@options = {}
@config = {}
errors = [] # Gather arg validation errors
# Get cmd line options
optparse = OptionParser.new do |opts|
# Banner
opts.banner = "Usage: sys_stat_monitors.rb [OPTIONS]"
# Definition of options
opts.on('-h', '--help', 'Display help screen') do
puts opts
exit
end
# Config file
@options[:config_file] = nil
opts.on('-c', '--config FILE', 'path to yaml config file') do |file|
@options[:config_file] = file
errors.push("#{file} does not exist") unless(File.file?(@options[:config_file]))
errors.push("#{file} does not appear to be a yaml file, must end in .yaml") unless(file =~ /\.yaml$/)
end
# Log file
@options[:log] = nil
opts.on('-l', '--log FILE', 'path to log output') do |file|
@options[:log] = file
end
# Enable debug
@options[:debug] = false
opts.on('-d', '--debug', 'enable debug logging') do
@options[:debug] = true
end
# Path to monitor commands on remote hosts, must end with '/'
@options[:cmd_path] = '~/bin/'
opts.on('-p', '--path', 'path to monitoring commands') do |path|
@options[:cmd_path] = path
end
# Enable execution
@options[:execute] = true
opts.on('-n', '--no_execute', "only ingest config and don't execute") do
@options[:execute] = false
end
end
optparse.parse!
errors.push("Must specify config file") if(@options[:config_file].nil?)
@options[:log] = (@options[:log].nil? ? "#{Time.now.to_i}.log" : @options[:log])
(errors.each { |err| puts err } and exit) if(!errors.empty?)
# METHODS
# Start log, return File obj
def start_log(log)
File.open(log, 'w')
end
# Print messages to log and stdout
def info_msg(msg)
@log.puts(msg)
puts msg
end
# Print debug messages to log and stdout if specified
def debug_msg(msg)
if(@options[:debug])
@log.puts(msg)
puts msg
end
end
# Parse the passed in config file
def parse_config(config)
@config = YAML.load_file(config)
debug_msg("CONFIG: #{@config.inspect}")
end
# run_cmd
def run_remote_cmd(user, host, cmd, cmd_log, cmd_path=@options[:cmd_path], forked=true)
ssh_port = (@config[:hosts][host][:ssh_port] ? @config[:hosts][host][:ssh_port] : '22')
debug_msg("exec: run_remote_cmd(#{user}, #{host}, #{cmd}, #{cmd_log}, #{cmd_path}, #{forked})")
cmd_type = (host == 'localhost' ? "sudo su - #{user} -c" : "ssh -p #{ssh_port} #{user}@#{host}")
debug_msg("CMD: #{cmd_type} '#{cmd_path}#{cmd}'")
forked ? fork { `#{cmd_type} "#{cmd_path}#{cmd}" > #{cmd_log}` } : `#{cmd_type} "#{cmd_path}#{cmd}" > #{cmd_log}`
end
# Figure out which pids we can detach and which we can wait on
# This may be useless and we may be able to just wait on all pids. no harm no foul
def do_wait(pid_times)
debug_msg("in pid_times: #{pid_times.inspect}")
highest_waits = pid_times.select { |k,v| v == pid_times.values.max } # has for highest pid wait times
debug_msg("highest_waits: #{highest_waits.inspect}")
pid_times.reject! { |k,v| v == highest_waits.values.max } # removes those pids from the hash
debug_msg("out pid_times: #{pid_times.inspect}")
# Don't wait for these pids
pid_times.each_pair do |pid, total_time|
debug_msg("Detaching pid: #{pid}")
Process.detach(pid)
end
# Wait for whatever the longest pid wait time is
highest_waits.each_pair do |pid, total_time|
debug_msg("Waiting for pid: #{pid}")
Process.waitpid(pid)
end
end
# Start monitors
def start_monitors
p = {}
p[:host] = {}
# Per host
i = 1
@config[:hosts].each_key do |host|
debug_msg("Working with host [#{host}]")
p[:host][host] = {}
p[:host][host][:cpu_pid_cmd] = (@config[:hosts][host][:sysstat] == "9" ? "pidstat -u -I -p" : "sar -u -x")
p[:host][host][:phase] = {}
p[:host][host][:fpid] = fork do
debug_msg("#{i}> p in: #{p.inspect}")
debug_msg("#{i}> In host [#{host}] fpid fork, pid [#{p[:host][host].inspect}]")
@config[:hosts][host][:phases].each_key do |phase|
debug_msg("Working with phase [#{phase}]")
p[:host][host][:phase][phase] = {}
p[:host][host][:phase][phase][:pids] = {}
p[:host][host][:phase][phase][:fpid] = fork do # start phase fork
@config[:hosts][host][:pids].each do |pid|
debug_msg("Working with pid [#{pid}]")
p[:host][host][:phase][phase][:pids][pid] = {}
p[:host][host][:phase][phase][:pids][pid][:total_time] = {}
p[:host][host][:phase][phase][:pids][pid][:fpid] = fork do
info_msg("Launching phase #{phase} monitors on #{host} for pid #{pid}... (#{Time.now.to_s})")
if(@config[:hosts][host][:phases][phase][:mem])
# Fork memory command
mem_cmd = (pid == 'ALL' ? "sar -r #{@config[:hosts][host][:phases][phase][:mem][:interval]} #{@config[:hosts][host][:phases][phase][:mem][:amount]}" :
"mem-stat.plx #{pid} #{@config[:hosts][host][:phases][phase][:mem][:interval]} #{@config[:hosts][host][:phases][phase][:mem][:amount]}")
p[:host][host][:phase][phase][:pids][pid][:mem] = run_remote_cmd(@config[:hosts][host][:user], host, mem_cmd,
"#{@config[:base_log_name]}-#{host}-phase#{phase}-mem_stats.log"
)
debug_msg("#{host}: Forked mem cmd pid return: [#{p[:host][host][:phase][phase][:pids][pid][:mem]}]")
# Store total mem monitoring time
p[:host][host][:phase][phase][:pids][pid][:total_time][p[:host][host][:phase][phase][:pids][pid][:mem]] = @config[:hosts][host][:phases][phase][:mem][:interval].to_i * @config[:hosts][host][:phases][phase][:mem][:amount].to_i
end
if(@config[:hosts][host][:phases][phase][:net])
# Forked net cmd
p[:host][host][:phase][phase][:pids][pid][:net] = run_remote_cmd(@config[:hosts][host][:user], host,
"net-mon.plx #{@config[:hosts][host][:phases][phase][:net][:interval]} #{@config[:hosts][host][:phases][phase][:net][:amount]} #{@config[:hosts][host][:http_port]}",
"#{@config[:base_log_name]}-#{host}-phase#{phase}-net_mon.log"
)
debug_msg("#{host}: Forked net cmd pid return: [#{p[:host][host][:phase][phase][:pids][pid][:net]}]")
# Store total net monitoring time
p[:host][host][:phase][phase][:pids][pid][:total_time][p[:host][host][:phase][phase][:pids][pid][:net]] = @config[:hosts][host][:phases][phase][:net][:interval].to_i * @config[:hosts][host][:phases][phase][:net][:amount].to_i
end
if(@config[:hosts][host][:phases][phase][:cpu])
# Forked cpu cmd
cpu_cmd = (pid == 'ALL' ? "sar -u #{@config[:hosts][host][:phases][phase][:cpu][:interval]} #{@config[:hosts][host][:phases][phase][:cpu][:amount]}" :
"#{p[:host][host][:cpu_pid_cmd]} #{pid} #{@config[:hosts][host][:phases][phase][:cpu][:interval]} #{@config[:hosts][host][:phases][phase][:cpu][:amount]}")
p[:host][host][:phase][phase][:pids][pid][:cpu] = run_remote_cmd(@config[:hosts][host][:user], host, cpu_cmd,
"#{@config[:base_log_name]}-#{host}-phase#{phase}-cpu_stats.log", nil
)
debug_msg("#{host}: Forked cpu cmd pid return: [#{p[:host][host][:phase][phase][:pids][pid][:cpu]}]")
# Store total cpu monitoring time
p[:host][host][:phase][phase][:pids][pid][:total_time][p[:host][host][:phase][phase][:pids][pid][:cpu]] = @config[:hosts][host][:phases][phase][:cpu][:interval].to_i * @config[:hosts][host][:phases][phase][:cpu][:amount].to_i
end
if(@config[:hosts][host][:phases][phase][:db])
# Forked db cmd
p[:host][host][:phase][phase][:pids][pid][:db] = run_remote_cmd(@config[:hosts][host][:user], host,
"db_conn-mon.plx #{@config[:hosts][host][:phases][phase][:db][:interval]} #{@config[:hosts][host][:phases][phase][:db][:amount]} #{@config[:hosts][host][:sys_user]} #{@config[:hosts][host][:sys_pass]} #{@config[:hosts][host][:schemas].join(' ')}",
"#{@config[:base_log_name]}-#{host}-phase#{phase}-db_conn_mon.log"
)
debug_msg("#{host}: Forked db cmd pid return: [#{p[:host][host][:phase][phase][:pids][pid][:db]}]")
# Store total db monitoring time
p[:host][host][:phase][phase][:pids][pid][:total_time][p[:host][host][:phase][phase][:pids][pid][:db]] = @config[:hosts][host][:phases][phase][:db][:interval].to_i * @config[:hosts][host][:phases][phase][:db][:amount].to_i
end
# WAIT
info_msg("Waiting for monitors for phase #{phase} on #{host} to stop...")
do_wait(p[:host][host][:phase][phase][:pids][pid][:total_time])
info_msg("...Monitors for phase #{phase} on #{host} stopped (#{Time.now.to_s})")
end #end pids fork
end #end pids
# Wait for commands in phase
p[:host][host][:phase][phase][:pids].each_key { |pid| Process.waitpid(p[:host][host][:phase][phase][:pids][pid][:fpid]) }
end #end phase fork
# Wait for phase to complete before launching next phase
Process.waitpid(p[:host][host][:phase][phase][:fpid])
end #end phase
end #end host fork
debug_msg("#{i}> p out: #{p.inspect}")
i+=1
end #end host
# wait for each host to come back
@config[:hosts].each_key { |host| Process.waitpid(p[:host][host][:fpid]) }
end
# MAIN
@log = start_log(@options[:log])
debug_msg("OPTIONS: #{@options.inspect}")
parse_config(@options[:config_file])
start_monitors if(@options[:execute])
exit 0