Skip to content

Commit

Permalink
Merge pull request #8 from omame/omame/zombie-allocs
Browse files Browse the repository at this point in the history
Add a counter for zombie allocs and skip them
  • Loading branch information
pcarranza authored Sep 9, 2018
2 parents 9f4b788 + 9e63d3f commit 21b1a6a
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
9 changes: 9 additions & 0 deletions exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
ch <- nodeUsedCPU

allocation.Describe(ch)
allocationZombies.Describe(ch)
evalCount.Describe(ch)
taskCount.Describe(ch)

Expand Down Expand Up @@ -477,6 +478,7 @@ func (e *Exporter) collectAllocations(nodes nodeMap, ch chan<- prometheus.Metric
}

var w sync.WaitGroup
allocationZombies.Set(0)

for _, allocStub := range allocStubs {
w.Add(1)
Expand All @@ -485,6 +487,12 @@ func (e *Exporter) collectAllocations(nodes nodeMap, ch chan<- prometheus.Metric
defer w.Done()

n := nodes[allocStub.NodeID]
if n == nil {
logrus.Debugf("Allocation %s doesn't have a node associated. Skipping",
allocStub.ID)
allocationZombies.Add(1)
return
}

if !nodes.IsReady(allocStub.NodeID) {
logrus.Debugf("Skipping fetching allocation %s for node %s because it's not in ready state but %s",
Expand Down Expand Up @@ -600,6 +608,7 @@ func (e *Exporter) collectAllocations(nodes nodeMap, ch chan<- prometheus.Metric

allocation.Collect(ch)
taskCount.Collect(ch)
allocationZombies.Collect(ch)
return nil
}

Expand Down
6 changes: 6 additions & 0 deletions nomad-exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ var (
"Allocation throttled CPU.",
[]string{"job", "group", "alloc", "region", "datacenter", "node"}, nil,
)
allocationZombies = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "allocation_zombies",
Help: "Allocation zombies.",
},
)
taskCPUTotalTicks = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "task_cpu_total_ticks"),
"Task CPU total ticks.",
Expand Down

0 comments on commit 21b1a6a

Please sign in to comment.