Skip to content

Commit

Permalink
working on minitor feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthias Lee committed Jun 12, 2011
1 parent 38f15b0 commit 099cc94
Show file tree
Hide file tree
Showing 12 changed files with 497 additions and 13 deletions.
260 changes: 260 additions & 0 deletions #manage-cluster.py#
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
#!/usr/bin/python

import os
import sys
import time
import CLnode
import GF


def curSpotCost(inst_size):
lt = time.localtime(time.time())
curdate = str(lt[0])+"-"+str(lt[1])+"-"+str(lt[2])+"T"+str(lt[3])+":"+str(lt[4])+":"+str(lt[5])+":"+str(lt[6])+"-0000"
try:
res = GF.run("ec2-describe-spot-price-history -d Linux/UNIX --region us-east-1 --instance-type "+inst_size+" -s "+curdate)
if res.find("timeout")>=0:
print "TIMEOUT: ", res
return -1
cost=0
for i in res.split("\n"):
cost += float((i.split("\t"))[1]);
cost = cost/len(res.split("\n"))
except Exception as x:
print x, "\n", res
return -1
GF.log("Current Instance Cost: "+str(cost), 1);
return cost

def startNodes(ami, inst_size, keyName, maxPrice, nodecnt):
GF.log("... starting " + str(nodecnt) + " node(s)", 1);
try:
#res = GF.run("ec2-request-spot-instances " + ami + " -p " + maxPrice)

res = GF.run("ec2-request-spot-instances " + ami + " -p " + str(maxPrice) + " -instance-type " + inst_size + " -n " + str(nodecnt) + " --type one-time" + " --key " + keyName)
if res.find("timeout")>=0:
print "TIMEOUT: ", res
return -1
if res.find("InvalidAMIID")>=0:
print "INVALID AMI ID: ", res
return -1
print res
except Exception as x:
print x, "\n", res
return -1

def launchCluster(ami, inst_size, keyName, maxPrice, nodes):
GF.log("Maximum Price: "+str(maxPrice), 1);
curPrice=curSpotCost(inst_size)
if curPrice == -1:
print "Error: Failed to get current spot price."
sys.exit(-1)
if curPrice > maxPrice:
print "Error: Current spot price too high."
sys.exit(-2)
GF.log("Launching "+str(nodes)+" nodes.", 1);
#for n in range (0,nodes):
startNodes(ami, inst_size, keyName, maxPrice, nodes)

def getRunningInstances():
nodes = []
try:
res = GF.run("ec2-describe-instances")
if res.find("timeout")>=0:
print "TIMEOUT: ", res
return -1
for line in res.split("\n"):
if line.find("INSTANCE")>=0:
inst=line.split("\t")
nodes.append(CLnode.CLnode(inst[1],inst[1],inst[5],inst[2],inst[6],inst[9],inst[10],inst[0],inst[3]))
except Exception as x:
print x, "\n", res
return -1
GF.addNewNodes(nodes)

#ec2-describe-spot-instance-requests
def getSpotRequests():
try:
res = GF.run("ec2-describe-spot-instance-requests")
if res.find("timeout")>=0:
print "TIMEOUT: ", res
return -1
for line in res.split("\n"):
if line.find("INSTANCE")>=0:
inst=line.split("\t")
GF.reqests.append(CLnode.CLnode(inst[1],inst[1],inst[5],'','','',inst[6],inst[0]))
except Exception as x:
print x, "\n", res
return -1

def buildBundle(payload, payloadDir):
try:
#RM old bundle
res=GF.run("rm "+payload)
GF.log("rm "+payload,1)
#make new bundle
res=GF.run("tar cvf "+payload+' '+payloadDir+"/*")
GF.log("tar cvf "+payload+' '+payloadDir+"/*",1)
except Exception as x:
print x, "\n", res
sys.exit()

def monitor(n, timeout):
allStarted=True

for i in range(0,n):
allStarted=True
getSpotRequests()
#getRunningInstances()
for n in GF.requests:
if n.status=="open":
allStarted=False
if
if allStarted is True:
break
time.sleep(timeout)
if allStarted is False:
print "All instances did not start during designated time."
if GF.confirmQuestion("Would you like to continue?") is True:
monitor(n, timeout)
else:
sys.exit()
else:
for i in range(0,n):
allStarted=True
getRunningInstances()
for n in GF.nodes:
if n.status=="pending":
allStarted=False
if allStarted is True:
break
if allStarted is False:
print "All instances did not start during designated time."
if GF.confirmQuestion("Would you like to continue?") is True:
monitor(n, timeout)
else:
sys.exit()
else:
#launch?!



if __name__ == "__main__":
ami="ami-06ad526f"
size="t1.micro"
key="id_rsa"
maxPrice=.01
sshKey='/home/madmaze/.ec2/pkey'
rebuildBundle=True
payload='./bundle.tar'
payloadDir='./payload'
print "Cluster manager v0.1";
argc=0
for arg in sys.argv[1:]:
argc+=1
if arg == "-debug":
GF.logLevel=2
if arg == "-info":
GF.logLevel=1
if arg == "-list" or arg == "-l":
getRunningInstances()
cnt=0
for node in GF.nodes:
if node.running() is True:
cnt+=1
node.desc()
GF.log("There are a totoal of "+str(cnt)+" instances running.",0)
sys.exit()
if arg == "-listblock" or arg == "-lb":
getRunningInstances()
cnt=0
for node in GF.nodes:
if node.running() is True:
cnt+=1
node.desc_detail()
GF.log("There are a totoal of "+str(cnt)+" instances running.",0)
sys.exit()
if arg == "-listspots" or arg == "-ls":
getSpotRequests()
runcnt=0
ocnt=0
for node in GF.reqests:
if node.status == "active":
runcnt+=1
if node.status == "open":
ocnt+=1
node.desc()
GF.log("There are a totoal of "+str(runcnt)+" active and "+str(ocnt)+" waiting to launch",0)
sys.exit()
if arg == "-launch" and len(sys.argv) >= argc+2:
try:
n=int(sys.argv[argc+1])
if n > 0:
if GF.confirmQuestion("This will create "+str(n)+" instance(s). \nAre you sure you want to continue?") is False:
sys.exit()
print "Launching "+str(n)+" instances"
launchCluster(ami, size, key, maxPrice, n)
else:
print "Please specify positive number"

except Exception as x:
print "Please specify number after -launch"
print x, sys.argv[argc+1]
sys.exit()
if arg == "-shutdown" or arg == "-killall":
# ask user for confirm
if GF.confirmQuestion("!!This will TERMINATE all running instances!! \nAre you sure you want to continue?") is False:
sys.exit()
getRunningInstances()
if len(GF.nodes)==0:
print "There are currently no nodes to kill"
for n in GF.nodes:
n.kill()
if arg == "-kill":
# ask user for confirm
foundinst=False
getRunningInstances()
if len(sys.argv) >= argc+2:
var=sys.argv[argc+1].strip()
else:
var = raw_input("Which host would you like to deploy?: ").strip()

for n in GF.nodes:
if n.instName==var:
foundinst=True
if foundinst is False:
print "There is currently no running instance by the ID: "+var
else:
if GF.confirmQuestion("!!This will kill the instance: "+var+"!\nAre you sure you want to continue?") is False:
sys.exit()
for n in GF.nodes:
if n.instName==var:
n.kill()
if arg == "-deploy":
# ask user for confirm
foundinst=False
getRunningInstances()
if len(sys.argv) >= argc+2:
var=sys.argv[argc+1].strip()
else:
var = raw_input("Which host would you like to deploy?: ").strip()

for n in GF.nodes:
if n.instName==var and n.status=='running':
foundinst=True
if foundinst is False:
print "There is currently no running instance by the ID: "+var
else:
if GF.confirmQuestion("!!This will deploy on the instance: "+var+"!\nAre you sure you want to continue?") is False:
sys.exit()
if rebuildBundle is True:
print "Building Bundle..."
buildBundle(payload, payloadDir)
for n in GF.nodes:
if n.instName==var:
n.deploy(payload,sshKey,True)
if arg == "-monitor":
monitor(10,10)

#ec2-describe-images -a | grep ami-06ad526f
#getRunningInstances()
#launchCluster("ami-06ad526f", "t1.micro", "id_rsa", .01, 10)
1 change: 1 addition & 0 deletions .CLnode.py.marks
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!~;1872;1872
12 changes: 8 additions & 4 deletions CLnode.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,25 +64,29 @@ def running(self):

def copy(self):
return CLnode(self.instID,self.instName,self.status,self.ami,self.key,self.size,self.date,self.ntype,self.url)
def deploy(self,payload,launch=False):

def deploy(self,payload,sshKey,launch=False):
# COPY payload
try:
res=GF.run("scp -i ~/.ec2/pkey /home/madmaze/.ec2/pkey [email protected]:~/.ssh/")
res=GF.run("scp -i "+sshKey+" "+payload+" ubuntu@"+self.url+":~/")
print "scp -i "+sshKey+" "+payload+" ubuntu@"+self.url+":~/"
except Exception as x:
print x, "\n", res
return -1

# EXTRACT payload
try:
res=GF.run("scp -i ~/.ec2/pkey /home/madmaze/.ec2/pkey [email protected]:~/.ssh/")
res=GF.run("ssh -i "+sshKey+" ubuntu@"+self.url+" 'tar xvf ~/bundle.tar;'")
print "ssh -i "+sshKey+" ubuntu@"+self.url+" 'tar xvf ~/bundle.tar;'"
except Exception as x:
print x, "\n", res
return -1

if launch is True:
# LAUNCH Payload
try:
res=GF.run("scp -i ~/.ec2/pkey /home/madmaze/.ec2/pkey [email protected]:~/.ssh/")
res=GF.run("ssh -i "+sshKey+" ubuntu@"+self.url+" 'python ~/payload/setup.py'")
print "ssh -i "+sshKey+" ubuntu@"+self.url+" 'python ~/payload/setup.py'"
except Exception as x:
print x, "\n", res
return -1
Expand Down
Binary file modified CLnode.pyc
Binary file not shown.
51 changes: 48 additions & 3 deletions CLnode.py~
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@ class CLnode:
key=''
size=''
date=''
ntype=''
url=''

def __init__(self, instID='',instName='',status='',ami='',key='',size='',date=''):
def __init__(self, instID='',instName='',status='',ami='',key='',size='',date='',ntype='',url=''):
self.instID=instID
self.instName=instName
self.status=status
self.ami=ami
self.key=key
self.size=size
self.date=date
self.ntype=ntype
self.url=url

def kill(self):
if self.status!="running":
Expand All @@ -33,13 +37,54 @@ class CLnode:
return -1

def desc(self):
print self.instID,self.instName,self.status,self.ami,self.key,self.size,self.date
print self.ntype,self.instID,self.instName,self.status,self.url,self.ami,self.key,self.size,self.date

def desc_detail(self):
print "Instance Type:\t\t"+self.ntype
print "Instance ID:\t\t"+self.instID
print "Instance Name:\t\t"+self.instName
print "Status:\t\t\t"+self.status
print "Hostname/url:\t"+self.url
print "AMI:\t\t\t"+self.ami
print "Keypair:\t\t"+self.key
print "Instance Size:\t\t"+self.size
print "Date/Time started:\t"+self.date
print "===="



def status(self):
return self.status

def running(self):
if self.status=="running":
return True
else:
return False

def copy(self):
return CLnode(self.instID,self.instName,self.status,self.ami,self.key,self.size,self.date)
return CLnode(self.instID,self.instName,self.status,self.ami,self.key,self.size,self.date,self.ntype,self.url)
def deploy(self,payload,launch=False):
# COPY payload
try:
res=GF.run("scp -i ~/.ec2/pkey /home/madmaze/.ec2/pkey [email protected]:~/.ssh/")
except Exception as x:
print x, "\n", res
return -1

# EXTRACT payload
try:
res=GF.run("scp -i ~/.ec2/pkey /home/madmaze/.ec2/pkey [email protected]:~/.ssh/")
except Exception as x:
print x, "\n", res
return -1

if launch is True:
# LAUNCH Payload
try:
res=GF.run("scp -i ~/.ec2/pkey /home/madmaze/.ec2/pkey [email protected]:~/.ssh/")
except Exception as x:
print x, "\n", res
return -1


Binary file added bundle.tar
Binary file not shown.
Loading

0 comments on commit 099cc94

Please sign in to comment.