From 42594f9a18f56fb67c8c38b32ff846e9226e886d Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Mon, 5 Feb 2018 13:00:41 +0100 Subject: [PATCH 01/64] testcommit, fork stuff --- Readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Readme.md b/Readme.md index 81ce59d..2d34fba 100644 --- a/Readme.md +++ b/Readme.md @@ -1,4 +1,4 @@ -# Harmonic IO Streaming Framework +# Harmonic IO Streaming Framework - Framework for distributed task execution, key components: @@ -58,7 +58,7 @@ $ python3 example_stream_connector.py * Print the logs of the container to check the output of the executed task (`message was bytes ...`): ``` -$ docker logs happy_jepsen +$ docker logs happy_jepsen Listening for tasks... attempting to open local port: 0.0.0.0:80 Streaming from 172.17.0.1 : 40742 From 371a6a827493a57da8affa1fb0e693c4e34b0bc0 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Mon, 5 Feb 2018 13:08:30 +0100 Subject: [PATCH 02/64] Revert "testcommit, fork stuff" This reverts commit 42594f9a18f56fb67c8c38b32ff846e9226e886d. --- Readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Readme.md b/Readme.md index 2d34fba..81ce59d 100644 --- a/Readme.md +++ b/Readme.md @@ -1,4 +1,4 @@ -# Harmonic IO Streaming Framework - +# Harmonic IO Streaming Framework Framework for distributed task execution, key components: @@ -58,7 +58,7 @@ $ python3 example_stream_connector.py * Print the logs of the container to check the output of the executed task (`message was bytes ...`): ``` -$ docker logs happy_jepsen +$ docker logs happy_jepsen Listening for tasks... attempting to open local port: 0.0.0.0:80 Streaming from 172.17.0.1 : 40742 From d8db906989d23a3933219835871243b5a02fe0e9 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Mon, 5 Feb 2018 17:01:14 +0100 Subject: [PATCH 03/64] added comment with stuff todo --- harmonicIO/worker/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index d4e4854..d8e4b24 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -54,7 +54,7 @@ def on_post(self, req, res): if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): # Unpack the posted data raw = str(req.stream.read(), 'UTF-8') - data = eval(raw) + data = eval(raw) ## should change eval to something else maybe? if req.content_length:doc = json.load(req.stream) if not data[Definition.Container.get_str_con_image_name()]: res.body = "Required parameters are not supplied!" From 0f0d19d83a8746281ba9615fdf44401d678a16c2 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Wed, 7 Feb 2018 17:07:58 +0100 Subject: [PATCH 04/64] added ClientManager to falcon api, added post request to queue a job --- harmonicIO/master/rest_service.py | 88 +++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 373b091..5b1b520 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -5,6 +5,8 @@ from harmonicIO.general.services import SysOut, Services as LService from .meta_table import LookUpTable +from urllib.request import urlopen +from urllib3.request import urlencode class RequestStatus(object): @@ -237,6 +239,89 @@ def on_get(self, req, res): res.content_type = "String" res.status = falcon.HTTP_200 +class ClientManager(object): + def __init__(self): + pass + + def on_get(self, req, res): + # check token and request type is provided + if not Definition.get_str_token() in req.params: + res.body = "Token is required." + res.content_type = "String" + res.status = falcon.HTTP_401 + return + + if not "type" in req.params: + res.body = "No command specified." + res.content_type = "String" + res.status = falcon.HTTP_406 + return + + # get status of job + #if req.params['type'] == 'jobStatus': + + + + return + + def on_post(self, req, res): + # check token and request type is provided + if not Definition.get_str_token() in req.params: + res.body = "Token is required." + res.content_type = "String" + res.status = falcon.HTTP_401 + return + + if not "type" in req.params: + res.body = "No command specified." + res.content_type = "String" + res.status = falcon.HTTP_406 + return + + # request to create new job - create ID for job, look for available container (prio 1 - worker with container available, prio 2 - worker with lowest cpu load), + if req.params['type'] == 'newJob': + + # create job ID + print("Requested new job!") + job_data = json.loads(req.stream) + print("Data provided: \n" + str(data)) + jobID = str(randrange(100,999)) + job_status = "INIT" + + # prepare response + res.body = "Request received, allocating resources for job - Job ID: {}".format(jobID) + res.content_type = "String" + res.status = falcon.HTTP_200 + print(job_status) + ## THIS PART SHOULD BE ASYNCHRONOUS + + # get server data + data = LookUpTable.verbose() + data['MSG'] = MessagesQueue.verbose() + candidates = [] + target_container = jobdata[Definition.Container.get_str_con_image_name()] + + # find suitable worker by prio 1 + if target_container in data["CONTAINERS"]: + print("Looking for container called " + target_container) + for container in data["CONTAINERS"][target_container]: + print("Found one at addr " + container["batch_addr"]) + candidates.append((container["batch_addr"], container["batch_port"], data["WORKERS"][container["batch_addr"]]["load5"])) # create tuple with IP, port and load on worker with container + + candidates.sort(key=lambda index: index[2]) # sort candidate workers on load (avg. load last 5 minutes) + print(candidates[0] + " has least load, sending request here!") + + # send request to worker + worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) + with urlopen(worker_url, req.params) as response: + html = response.read() + + worker_response = html.decode('UTF-8') + print(worker_response) + job_status = "ACTIVE" + print(job_status) + return + class RESTService(object): def __init__(self): # Initialize REST Services @@ -252,6 +337,9 @@ def __init__(self): # Add route for msg query api.add_route('/' + Definition.REST.get_str_msg_query(), MessagesQuery()) + # Add route for client manager + api.add_route('/' + 'clientManagement', ClientManager()) + # Establishing a REST server self.__server = make_server(Setting.get_node_addr(), Setting.get_node_port(), api) From 06b0cac49c44db6cffcb12f889a86a6fc2e6fbea Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Wed, 7 Feb 2018 17:27:29 +0100 Subject: [PATCH 05/64] bugfix in POST --- harmonicIO/master/rest_service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 5b1b520..d24747b 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -283,8 +283,9 @@ def on_post(self, req, res): # create job ID print("Requested new job!") - job_data = json.loads(req.stream) - print("Data provided: \n" + str(data)) + job_data = req.stream#json.loads(req.stream) + + print("Data provided: \n" + str(job_data)) jobID = str(randrange(100,999)) job_status = "INIT" From 5f6245f9d7af58c1240dd86f199bad33a00c3707 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Wed, 7 Feb 2018 17:30:26 +0100 Subject: [PATCH 06/64] bugifx, added randrange --- harmonicIO/master/rest_service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index d24747b..33f9768 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -7,6 +7,7 @@ from urllib.request import urlopen from urllib3.request import urlencode +from random import randrange class RequestStatus(object): From 3372b845e432463b00c980c172b1a2f1562890b3 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Wed, 7 Feb 2018 17:34:27 +0100 Subject: [PATCH 07/64] bugifx, typo and testing req.stream read --- harmonicIO/master/rest_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 33f9768..1eefe42 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -286,7 +286,7 @@ def on_post(self, req, res): print("Requested new job!") job_data = req.stream#json.loads(req.stream) - print("Data provided: \n" + str(job_data)) + print("Data provided: \n" + str(job_data, 'UTF-8')) jobID = str(randrange(100,999)) job_status = "INIT" @@ -301,7 +301,7 @@ def on_post(self, req, res): data = LookUpTable.verbose() data['MSG'] = MessagesQueue.verbose() candidates = [] - target_container = jobdata[Definition.Container.get_str_con_image_name()] + target_container = job_data[Definition.Container.get_str_con_image_name()] # find suitable worker by prio 1 if target_container in data["CONTAINERS"]: From e8663f9d5c8420b495abe8837a6e49680ea322a9 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Wed, 7 Feb 2018 17:35:53 +0100 Subject: [PATCH 08/64] bugifx --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 1eefe42..ab271ab 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -286,7 +286,7 @@ def on_post(self, req, res): print("Requested new job!") job_data = req.stream#json.loads(req.stream) - print("Data provided: \n" + str(job_data, 'UTF-8')) + print("Data provided: \n" + str(job_data)) jobID = str(randrange(100,999)) job_status = "INIT" From 47a09bb1846f7ff6891952136f68a96bd921ab94 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Wed, 7 Feb 2018 17:41:03 +0100 Subject: [PATCH 09/64] bugifx --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index ab271ab..7527f4d 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -284,7 +284,7 @@ def on_post(self, req, res): # create job ID print("Requested new job!") - job_data = req.stream#json.loads(req.stream) + job_data = req.stream.read(req.content_length or 0)#json.loads(req.stream) print("Data provided: \n" + str(job_data)) jobID = str(randrange(100,999)) From 69dacb77c0fe72f8c7248cfb2e31e481e16656ac Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Wed, 7 Feb 2018 17:47:52 +0100 Subject: [PATCH 10/64] bugifx, added json loads --- harmonicIO/master/rest_service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 7527f4d..e96b004 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -8,6 +8,7 @@ from urllib.request import urlopen from urllib3.request import urlencode from random import randrange +import json class RequestStatus(object): @@ -284,7 +285,7 @@ def on_post(self, req, res): # create job ID print("Requested new job!") - job_data = req.stream.read(req.content_length or 0)#json.loads(req.stream) + job_data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8'))#json.loads(req.stream) print("Data provided: \n" + str(job_data)) jobID = str(randrange(100,999)) @@ -315,7 +316,7 @@ def on_post(self, req, res): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) - with urlopen(worker_url, req.params) as response: + with urlopen(worker_url, job_data) as response: html = response.read() worker_response = html.decode('UTF-8') From 42f7a7f148f493ffa269e80525a5b75c05dbde20 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 12:02:33 +0100 Subject: [PATCH 11/64] bugfix, str repr of tuple --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index e96b004..a46b60f 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -312,7 +312,7 @@ def on_post(self, req, res): candidates.append((container["batch_addr"], container["batch_port"], data["WORKERS"][container["batch_addr"]]["load5"])) # create tuple with IP, port and load on worker with container candidates.sort(key=lambda index: index[2]) # sort candidate workers on load (avg. load last 5 minutes) - print(candidates[0] + " has least load, sending request here!") + print(str(candidates[0]) + " has least load, sending request here!") # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) From 51225a41ae1e5235b324edb7a527bb47c5d816b1 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 12:08:40 +0100 Subject: [PATCH 12/64] bugfix, fix urlencode --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index a46b60f..26f96a8 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -316,7 +316,7 @@ def on_post(self, req, res): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) - with urlopen(worker_url, job_data) as response: + with urlopen(worker_url, urlencode(job_data)) as response: html = response.read() worker_response = html.decode('UTF-8') From 9c6339130e7cfa27d80e28c0a434ed0cf2baebdd Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 13:26:51 +0100 Subject: [PATCH 13/64] bugfix --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 26f96a8..3eb532d 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -287,7 +287,7 @@ def on_post(self, req, res): print("Requested new job!") job_data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8'))#json.loads(req.stream) - print("Data provided: \n" + str(job_data)) + print("Data provided: \n", (job_data)) jobID = str(randrange(100,999)) job_status = "INIT" From 65c7795a0dedcf82d6ebc2d7a346db9e7ab04050 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 13:45:14 +0100 Subject: [PATCH 14/64] urlencode fixed? --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 3eb532d..47de98a 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -316,7 +316,7 @@ def on_post(self, req, res): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) - with urlopen(worker_url, urlencode(job_data)) as response: + with urlopen(worker_url, bytes(urlencode(job_data), 'utf-8')) as response: html = response.read() worker_response = html.decode('UTF-8') From 3a3ec830cfb470292d4d102634fae46b8bfe647c Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 13:54:18 +0100 Subject: [PATCH 15/64] fixed parameter read in docker create call --- harmonicIO/worker/rest_service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index d8e4b24..749ebeb 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -53,8 +53,7 @@ def on_post(self, req, res): """ if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): # Unpack the posted data - raw = str(req.stream.read(), 'UTF-8') - data = eval(raw) ## should change eval to something else maybe? if req.content_length:doc = json.load(req.stream) + data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist if not data[Definition.Container.get_str_con_image_name()]: res.body = "Required parameters are not supplied!" From d2047178f1e9fbda3e249927cbefe091edc64eff Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 13:57:05 +0100 Subject: [PATCH 16/64] fixed missing import --- harmonicIO/worker/rest_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 749ebeb..211f0ea 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -3,7 +3,7 @@ from harmonicIO.general.services import SysOut, Services from .docker_service import DockerService from harmonicIO.general.definition import Definition, CRole - +from json import loads class ContainerService(object): def __init__(self): @@ -53,7 +53,7 @@ def on_post(self, req, res): """ if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): # Unpack the posted data - data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist + data = loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist if not data[Definition.Container.get_str_con_image_name()]: res.body = "Required parameters are not supplied!" From f31fc739d976904a351e338d97af9b9320854c6f Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 14:07:29 +0100 Subject: [PATCH 17/64] fixed missing import --- harmonicIO/master/rest_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 47de98a..4f06646 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -285,7 +285,7 @@ def on_post(self, req, res): # create job ID print("Requested new job!") - job_data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8'))#json.loads(req.stream) + job_data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist print("Data provided: \n", (job_data)) jobID = str(randrange(100,999)) @@ -316,7 +316,7 @@ def on_post(self, req, res): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) - with urlopen(worker_url, bytes(urlencode(job_data), 'utf-8')) as response: + with urlopen(worker_url, bytes(str(job_data), 'utf-8')) as response: html = response.read() worker_response = html.decode('UTF-8') From 2b3de1dbdd936aeed804761ecfe235498cd04107 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 14:15:29 +0100 Subject: [PATCH 18/64] debugging --- harmonicIO/master/rest_service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 4f06646..45061a0 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -316,6 +316,7 @@ def on_post(self, req, res): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) + print(worker_url, '\n', job_data, '\n', bytes(str(job_data), 'utf-8')) with urlopen(worker_url, bytes(str(job_data), 'utf-8')) as response: html = response.read() From e783d018d946541704b24d080baf9d700c5d3436 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 14:36:18 +0100 Subject: [PATCH 19/64] changed encoding to use json dumps --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 45061a0..0fd3594 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -316,7 +316,7 @@ def on_post(self, req, res): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) - print(worker_url, '\n', job_data, '\n', bytes(str(job_data), 'utf-8')) + print(worker_url, '\n', job_data, '\n', bytes(json.dumps(job_data), 'utf-8')) with urlopen(worker_url, bytes(str(job_data), 'utf-8')) as response: html = response.read() From 48d6eecd3c264ceee687b3b32175e13ba31f4301 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 14:50:44 +0100 Subject: [PATCH 20/64] debugging --- harmonicIO/worker/rest_service.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 211f0ea..fc9d317 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -53,7 +53,9 @@ def on_post(self, req, res): """ if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): # Unpack the posted data - data = loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist + raw = req.stream.read(req.content_legnth or 0) + print(raw, str(raw, 'utf-8')) + data = json.loads(str(raw, 'utf-8')) # create dict of parameters if they exist if not data[Definition.Container.get_str_con_image_name()]: res.body = "Required parameters are not supplied!" From b2d87a9a45ce57bfc9a1386423c80a582d4b437c Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 14:58:02 +0100 Subject: [PATCH 21/64] json dumps fixing? --- harmonicIO/master/rest_service.py | 2 +- harmonicIO/worker/rest_service.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 0fd3594..6cc088b 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -317,7 +317,7 @@ def on_post(self, req, res): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) print(worker_url, '\n', job_data, '\n', bytes(json.dumps(job_data), 'utf-8')) - with urlopen(worker_url, bytes(str(job_data), 'utf-8')) as response: + with urlopen(worker_url, bytes(json.dumps(job_data), 'utf-8')) as response: html = response.read() worker_response = html.decode('UTF-8') diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index fc9d317..9501b0b 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -53,7 +53,7 @@ def on_post(self, req, res): """ if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): # Unpack the posted data - raw = req.stream.read(req.content_legnth or 0) + raw = req.stream.read(req.content_length or 0) print(raw, str(raw, 'utf-8')) data = json.loads(str(raw, 'utf-8')) # create dict of parameters if they exist From be6521006d00d23c096b0c01ecaec4871d6b9512 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Thu, 8 Feb 2018 14:59:55 +0100 Subject: [PATCH 22/64] json typos --- harmonicIO/worker/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 9501b0b..699fb19 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -3,7 +3,7 @@ from harmonicIO.general.services import SysOut, Services from .docker_service import DockerService from harmonicIO.general.definition import Definition, CRole -from json import loads +import json class ContainerService(object): def __init__(self): From aa7f78e33087feeee9b78606083d0da5a94ea98a Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Fri, 9 Feb 2018 15:26:55 +0100 Subject: [PATCH 23/64] added Jobs field to metadata --- harmonicIO/master/meta_table.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 9b685b2..c4cf1e5 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -25,6 +25,7 @@ def add_worker(dict_input): @staticmethod def del_worker(worker_addr): + # TODO: implement actual worker termination? del LookUpTable.Workers.__workers[worker_addr] class Containers(object): @@ -94,6 +95,19 @@ def add_tuple_info(tuple_info): def verbose(): return LookUpTable.Tuples.__tuples + class Jobs(object): + __jobs = {} + + @staticmethod + def new_job(request): + new_item = {} + new_item['job_id'] = request.get('job_id') + new_item['job_status'] = request.get('job_status') + new_item['container'] = request + new_item[''] = request[''] + new_item[''] = request[''] + new_item[''] = request[''] + @staticmethod def update_worker(dict_input): LookUpTable.Workers.add_worker(dict_input) @@ -110,4 +124,3 @@ def verbose(): ret['TUPLES'] = LookUpTable.Tuples.verbose() return ret - From a834ec4d291e5decb55f3e58f3b6c3087bee730c Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Mon, 12 Feb 2018 18:22:38 +0100 Subject: [PATCH 24/64] added job to metadata --- harmonicIO/master/meta_table.py | 59 +++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index c4cf1e5..ed30104 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -1,5 +1,5 @@ import queue -from harmonicIO.general.services import Services +from harmonicIO.general.services import Services, SysOut from harmonicIO.general.definition import Definition, CTuple @@ -98,15 +98,53 @@ def verbose(): class Jobs(object): __jobs = {} + # create new job from request dictionary @staticmethod def new_job(request): new_item = {} - new_item['job_id'] = request.get('job_id') + new_id = request.get('job_id') + if not new_id: + SysOut.warn_string("Couldn't create job, no ID provided!") + return None + + if new_id in LookUpTable.Jobs.__jobs: + SysOut.warn_string("Job already exists in system, can't create!") + return None + + new_item['job_id'] = new_id new_item['job_status'] = request.get('job_status') - new_item['container'] = request - new_item[''] = request[''] - new_item[''] = request[''] - new_item[''] = request[''] + new_item[Definition.Container.get_str_con_image_name()] = request.get(Definition.Container.get_str_con_image_name()) + new_item['user_token'] = request.get(Definition.get_str_token()) + new_item['time_to_live'] = request.get('ttl') + + LookUpTable.Jobs.__jobs[new_id] = new_item + + @staticmethod + def update_job(request): + job_id = request.get('job_id') + if not job_id in LookUpTable.Jobs.__jobs: + SysOut.warn_string("Couldn't update job, no existing job matching ID!") + return None + + tkn = request.get(Definitions.get_str_token()) + if not tkn == LookUpTable.Jobs.__jobs[job_id]['user_token']: + SysOut.warn_string("Incorrect token, refusing update.") + return None + + old_job = LookUpTable.Jobs.__jobs[] + old_job['job_status'] = request.get('job_status') + old_job[Definition.Container.get_str_con_image_name()] = request.get(Definition.Container.get_str_con_image_name()) +# old_job['user_token'] = request.get(Definition.get_str_token()) # should not be able to change user who requested job? + old_job['time_to_live'] = request.get('ttl') + old_job['start_time'] = request.get('time') + host = {} + host[Definition.get_str_node_port()] = request.get(Definition.get_str_node_port()) + host[Definition.get_str_node_addr()] = request.get(Definition.get_str_node_addr()) + old_job['host_container'] = host + + @staticmethod + def verbose(): + return LookUpTable.Jobs.__jobs @staticmethod def update_worker(dict_input): @@ -116,11 +154,20 @@ def update_worker(dict_input): def get_candidate_container(image_name): return LookUpTable.Containers.get_candidate_container(image_name) + @staticmethod + def new_job(request): + LookUpTable.Jobs.new_job(request) + + @staticmethod + def update_job(request): + LookUpTable.Jobs.update_job(request) + @staticmethod def verbose(): ret = dict() ret['WORKERS'] = LookUpTable.Workers.verbose() ret['CONTAINERS'] = LookUpTable.Containers.verbose() ret['TUPLES'] = LookUpTable.Tuples.verbose() + ret['JOBS'] = LookUpTable.Jobs.verbose() return ret From e81ace69152b68cbbdf6824dcc7f1e80fee51e92 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Mon, 12 Feb 2018 18:23:13 +0100 Subject: [PATCH 25/64] working on creating job upon user request, search available local images --- harmonicIO/general/definition.py | 4 ++ harmonicIO/master/rest_service.py | 100 +++++++++++++++++++--------- harmonicIO/worker/__main__.py | 1 + harmonicIO/worker/docker_master.py | 3 + harmonicIO/worker/docker_service.py | 4 ++ harmonicIO/worker/rest_service.py | 3 + 6 files changed, 83 insertions(+), 32 deletions(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index 20681ab..f999850 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -188,6 +188,10 @@ def get_str_token(): def get_str_docker(): return "docker" + @staticmethod + def get_str_local_imgs(): + return "local_images" + class Batch(object): @staticmethod def get_str_batch_addr(): diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 6cc088b..0ecfd62 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -7,8 +7,9 @@ from urllib.request import urlopen from urllib3.request import urlencode -from random import randrange +from random.SystemRandom import choice import json +import string class RequestStatus(object): @@ -285,45 +286,27 @@ def on_post(self, req, res): # create job ID print("Requested new job!") - job_data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist + job_req = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist + + # below ID randomizer from: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python + job_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(5)) + + # add job to table + job_req['job_id'] = job_id + job_req['job_status'] = "INIT" + job_req['ttl'] = 30 + job_req['start_time'] = LService.get_current_timestamp() + LookUpTable.Jobs.new_job - print("Data provided: \n", (job_data)) - jobID = str(randrange(100,999)) - job_status = "INIT" # prepare response - res.body = "Request received, allocating resources for job - Job ID: {}".format(jobID) + res.body = "Request received, allocating resources for job - Job ID: {}".format(job_id) res.content_type = "String" res.status = falcon.HTTP_200 print(job_status) + ## THIS PART SHOULD BE ASYNCHRONOUS - # get server data - data = LookUpTable.verbose() - data['MSG'] = MessagesQueue.verbose() - candidates = [] - target_container = job_data[Definition.Container.get_str_con_image_name()] - - # find suitable worker by prio 1 - if target_container in data["CONTAINERS"]: - print("Looking for container called " + target_container) - for container in data["CONTAINERS"][target_container]: - print("Found one at addr " + container["batch_addr"]) - candidates.append((container["batch_addr"], container["batch_port"], data["WORKERS"][container["batch_addr"]]["load5"])) # create tuple with IP, port and load on worker with container - - candidates.sort(key=lambda index: index[2]) # sort candidate workers on load (avg. load last 5 minutes) - print(str(candidates[0]) + " has least load, sending request here!") - - # send request to worker - worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) - print(worker_url, '\n', job_data, '\n', bytes(json.dumps(job_data), 'utf-8')) - with urlopen(worker_url, bytes(json.dumps(job_data), 'utf-8')) as response: - html = response.read() - - worker_response = html.decode('UTF-8') - print(worker_response) - job_status = "ACTIVE" - print(job_status) return class RESTService(object): @@ -352,6 +335,59 @@ def run(self): self.__server.serve_forever() +def find_available_worker(job_req): + # get server data + data = LookUpTable.verbose() + data['MSG'] = MessagesQueue.verbose() + candidates = [] + target_container = job_req[Definition.Container.get_str_con_image_name()] + + for worker in data["WORKERS"]: + if worker[Definition.REST.get_str_local_imgs()]: + + for image in worker[Definition.REST.get_str_local_imgs()]: + if target_container in image.tags: + candidate = (worker["node_addr"], worker["load5"]) # create tuple with IP and load on worker with container + + # find suitable worker by prio 1 + if target_container in data["CONTAINERS"]: + print("Looking for container called " + target_container) + for container in data["CONTAINERS"][target_container]: + candidate = ((container["batch_addr"], data["WORKERS"][container["batch_addr"]]["load5"])) # create tuple with IP and load on worker with container + if candidate[1] < 0.5: # only add candidate if worker load less than 50% + candidates.append(candidate) + + + # find suitable worker by prio 2 + elif data["WORKERS"]: + for worker in data["WORKERS"]: + + if candidate[1] < 0.5: + candidates.append(candidate) + + # no suitable worker available + else: + return None + + candidates.sort(key=lambda index: index[1]) # sort candidate workers on load (avg. load last 5 minutes) + print('Candidates:\n' + candidates) + print(str(candidates[0]) + " has least load, sending request here!") + + return candidates + +def start_job(target_worker) + + # send request to worker + worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) + print(worker_url, '\n', job_req, '\n', bytes(json.dumps(job_req), 'utf-8')) + with urlopen(worker_url, bytes(json.dumps(job_req), 'utf-8')) as response: + html = response.read() + + worker_response = html.decode('UTF-8') + print(worker_response) + job_status = "ACTIVE" + print(job_status) + def get_html_form(worker, msg, containers, tuples): html = """ diff --git a/harmonicIO/worker/__main__.py b/harmonicIO/worker/__main__.py index d5df5fe..36a10c2 100644 --- a/harmonicIO/worker/__main__.py +++ b/harmonicIO/worker/__main__.py @@ -29,6 +29,7 @@ def update_worker_status(): s_content = Services.get_machine_status(Setting, CRole.WORKER) s_content[Definition.REST.get_str_docker()] = DockerService.get_containers_status() + s_content[Definition.REST.get_str_local_imgs()] = DockerService.get_local_images() html = urllib3.PoolManager() try: diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 6ce4346..6ad8860 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -70,6 +70,9 @@ def get_container_status(input): return res + def get_local_images(self): + self.__client.images.list() + def run_container(self, container_name): def get_ports_setting(expose, ports): diff --git a/harmonicIO/worker/docker_service.py b/harmonicIO/worker/docker_service.py index 557f81e..56b00d1 100644 --- a/harmonicIO/worker/docker_service.py +++ b/harmonicIO/worker/docker_service.py @@ -15,3 +15,7 @@ def create_container(container_name): @staticmethod def get_containers_status(): return DockerService.__docker_master.get_containers_status() + + @staticmethod + def get_local_images(): + return DockerService.__docker_master.get_local_images() diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 699fb19..68e0d45 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -32,6 +32,8 @@ def on_get(self, req, res): res.content_type = "String" res.status = falcon.HTTP_200 + ## TODO: add list containers + def on_post(self, req, res): """ POST: docker?token=None&command={command} @@ -93,6 +95,7 @@ def on_get(self, req, res): if req.params[Definition.get_str_token()] == Setting.get_token(): s_content = Services.get_machine_status(Setting, CRole.WORKER) s_content[Definition.REST.get_str_docker()] = DockerService.get_containers_status() + s_content[Definition.REST.get_str_local_imgs()] = DockerService.get_local_images() res.body = str(s_content) From c188a65a55863d0dd48476f375398af2081fc678 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 16:22:14 +0100 Subject: [PATCH 26/64] created class for managing job queue --- harmonicIO/master/jobqueue.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 harmonicIO/master/jobqueue.py diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py new file mode 100644 index 0000000..ead0367 --- /dev/null +++ b/harmonicIO/master/jobqueue.py @@ -0,0 +1,6 @@ +import threading, queue + +class JobQueue(object): + + def queue_job(job_data): + print("Not yet implemented") From 0604724d3ece9ff0009bb56e18c36fad7026367c Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 16:23:04 +0100 Subject: [PATCH 27/64] refactoring, added job status definiton class --- harmonicIO/general/definition.py | 7 +++ harmonicIO/master/__main__.py | 7 +-- harmonicIO/master/meta_table.py | 4 +- harmonicIO/master/rest_service.py | 76 +++++++++++++++++-------------- 4 files changed, 57 insertions(+), 37 deletions(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index f999850..92c1e18 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -23,6 +23,13 @@ class CTuple: RT = 3 +class JobStatus(object): + INIT = 0 + READY = 1 + ACTIVE = 2 + IDLE = 3 + + class Definition(object): @staticmethod def get_str_node_name(): diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index 71f2f2b..d8e90ee 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -1,5 +1,5 @@ from harmonicIO.general.services import SysOut - +import threading """ Master entry point @@ -21,7 +21,6 @@ def run_msg_service(): """ from .configuration import Setting from .server_socket import ThreadedTCPServer, ThreadedTCPRequestHandler - import threading server = ThreadedTCPServer((Setting.get_node_addr(), Setting.get_data_port_start()), ThreadedTCPRequestHandler, bind_and_activate=True) @@ -40,6 +39,9 @@ def run_msg_service(): # server.server_close() +def run_job_sevice(): + return None + if __name__ == '__main__': """ Entry point @@ -64,4 +66,3 @@ def run_msg_service(): # Binding commander to the rest service and enable REST service pool.submit(run_rest_service) - diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index ed30104..7856558 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -116,9 +116,11 @@ def new_job(request): new_item[Definition.Container.get_str_con_image_name()] = request.get(Definition.Container.get_str_con_image_name()) new_item['user_token'] = request.get(Definition.get_str_token()) new_item['time_to_live'] = request.get('ttl') - + new_item['start_time'] = request.get('start_time') LookUpTable.Jobs.__jobs[new_id] = new_item + return True + @staticmethod def update_job(request): job_id = request.get('job_id') diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 0ecfd62..ce43f9b 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -1,15 +1,15 @@ import falcon from .configuration import Setting -from harmonicIO.general.definition import Definition, CStatus, CRole +from harmonicIO.general.definition import Definition, CStatus, CRole, JobStatus from .messaging_system import MessagesQueue from harmonicIO.general.services import SysOut, Services as LService from .meta_table import LookUpTable from urllib.request import urlopen from urllib3.request import urlencode -from random.SystemRandom import choice + import json -import string +import JobQueue class RequestStatus(object): @@ -260,11 +260,6 @@ def on_get(self, req, res): res.status = falcon.HTTP_406 return - # get status of job - #if req.params['type'] == 'jobStatus': - - - return def on_post(self, req, res): @@ -281,31 +276,9 @@ def on_post(self, req, res): res.status = falcon.HTTP_406 return - # request to create new job - create ID for job, look for available container (prio 1 - worker with container available, prio 2 - worker with lowest cpu load), - if req.params['type'] == 'newJob': - - # create job ID - print("Requested new job!") - job_req = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist - - # below ID randomizer from: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python - job_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(5)) - - # add job to table - job_req['job_id'] = job_id - job_req['job_status'] = "INIT" - job_req['ttl'] = 30 - job_req['start_time'] = LService.get_current_timestamp() - LookUpTable.Jobs.new_job - - - # prepare response - res.body = "Request received, allocating resources for job - Job ID: {}".format(job_id) - res.content_type = "String" - res.status = falcon.HTTP_200 - print(job_status) - - ## THIS PART SHOULD BE ASYNCHRONOUS + # request to create new job - create ID for job, add to lookup table, queue creation of the job + if req.params['type'] == 'new_job': + new_job(req, res) return @@ -335,6 +308,43 @@ def run(self): self.__server.serve_forever() +def new_job(req, res): + # create job ID + print("Requested new job!") + job_params = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist + + ### below ID randomizer from: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python + def rand_id(N): + from random.SystemRandom import choice + import string + return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(N)) + ### + + # make sure ID is new + job_id = rand_id(5) + while job_id in LookUpTable.Jobs.__jobs: + job_id = rand_id(5) + + # add job to table + job_req['job_id'] = job_id + job_req['job_status'] = JobStatus.INIT + job_req['ttl'] = 30 + job_req['start_time'] = LService.get_current_timestamp() + if not LookUpTable.Jobs.new_job(job_req): + SysOut.err_string("New job could not be added!") + res.body = "Could not create job." + res.content_type = "String" + res.status = falcon.HTTP_500 + return + + # prepare response + res.body = "Request received, allocating resources for job - Job ID: {}".format(job_id) + res.content_type = "String" + res.status = falcon.HTTP_200 + + # queue creation + JobQueue.queue_job(job_req) + def find_available_worker(job_req): # get server data data = LookUpTable.verbose() From 15c8932538a0d1e601454d97db15c258183a187f Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 16:41:04 +0100 Subject: [PATCH 28/64] unsaved changes in meta table --- harmonicIO/master/meta_table.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 7856558..8b16181 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -136,9 +136,9 @@ def update_job(request): old_job = LookUpTable.Jobs.__jobs[] old_job['job_status'] = request.get('job_status') old_job[Definition.Container.get_str_con_image_name()] = request.get(Definition.Container.get_str_con_image_name()) -# old_job['user_token'] = request.get(Definition.get_str_token()) # should not be able to change user who requested job? - old_job['time_to_live'] = request.get('ttl') - old_job['start_time'] = request.get('time') + #old_job['user_token'] = request.get(Definition.get_str_token()) # should not be able to change user who requested job? + if 'ttl' in request: + old_job['time_to_live'] = request.get('ttl') host = {} host[Definition.get_str_node_port()] = request.get(Definition.get_str_node_port()) host[Definition.get_str_node_addr()] = request.get(Definition.get_str_node_addr()) From cf52da2e59d24820ebeb5648e453f620f4a5a7d2 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 17:01:50 +0100 Subject: [PATCH 29/64] I broke something, fixing --- harmonicIO/master/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index d8e90ee..5ba272f 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -1,5 +1,5 @@ from harmonicIO.general.services import SysOut -import threading + """ Master entry point @@ -21,6 +21,7 @@ def run_msg_service(): """ from .configuration import Setting from .server_socket import ThreadedTCPServer, ThreadedTCPRequestHandler + import threading server = ThreadedTCPServer((Setting.get_node_addr(), Setting.get_data_port_start()), ThreadedTCPRequestHandler, bind_and_activate=True) From 422cf2d80f539579f70a4f9509bec79d18cc5bc0 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 17:19:24 +0100 Subject: [PATCH 30/64] Maybe fixed broken? Added return statement to external metadata function new_job --- harmonicIO/master/meta_table.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 8b16181..20c9b31 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -105,11 +105,11 @@ def new_job(request): new_id = request.get('job_id') if not new_id: SysOut.warn_string("Couldn't create job, no ID provided!") - return None + return False if new_id in LookUpTable.Jobs.__jobs: SysOut.warn_string("Job already exists in system, can't create!") - return None + return False new_item['job_id'] = new_id new_item['job_status'] = request.get('job_status') @@ -158,7 +158,7 @@ def get_candidate_container(image_name): @staticmethod def new_job(request): - LookUpTable.Jobs.new_job(request) + return LookUpTable.Jobs.new_job(request) @staticmethod def update_job(request): From 934beea1da1547eacfd0af19f36f6cadc5174f39 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 17:30:58 +0100 Subject: [PATCH 31/64] Maybe fixed broken? Imported jobqueue.JobQueue --- harmonicIO/master/rest_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index ce43f9b..7cf190c 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -9,7 +9,7 @@ from urllib3.request import urlencode import json -import JobQueue +from jobqueue import JobQueue class RequestStatus(object): From cc5e0a8dc4523577ca957e8139c55c8856b5129c Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 17:49:07 +0100 Subject: [PATCH 32/64] Fixed typo in meta table update_job --- harmonicIO/master/meta_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 20c9b31..0b62e94 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -133,7 +133,7 @@ def update_job(request): SysOut.warn_string("Incorrect token, refusing update.") return None - old_job = LookUpTable.Jobs.__jobs[] + old_job = LookUpTable.Jobs.__jobs[job_id] old_job['job_status'] = request.get('job_status') old_job[Definition.Container.get_str_con_image_name()] = request.get(Definition.Container.get_str_con_image_name()) #old_job['user_token'] = request.get(Definition.get_str_token()) # should not be able to change user who requested job? From 2ed2602d5b04e71eaf162f5bd7ef28acd8c81c3b Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 18:05:45 +0100 Subject: [PATCH 33/64] Fixed typos in rest service --- harmonicIO/general/definition.py | 2 +- harmonicIO/master/rest_service.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index 92c1e18..aaecb43 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -23,7 +23,7 @@ class CTuple: RT = 3 -class JobStatus(object): +class JobStatus: INIT = 0 READY = 1 ACTIVE = 2 diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 7cf190c..6839155 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -355,9 +355,9 @@ def find_available_worker(job_req): for worker in data["WORKERS"]: if worker[Definition.REST.get_str_local_imgs()]: - for image in worker[Definition.REST.get_str_local_imgs()]: - if target_container in image.tags: - candidate = (worker["node_addr"], worker["load5"]) # create tuple with IP and load on worker with container + for image in worker[Definition.REST.get_str_local_imgs()]: + if target_container in image.tags: + candidate = (worker["node_addr"], worker["load5"]) # create tuple with IP and load on worker with container # find suitable worker by prio 1 if target_container in data["CONTAINERS"]: @@ -385,7 +385,7 @@ def find_available_worker(job_req): return candidates -def start_job(target_worker) +def start_job(target_worker): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) From 4211e43fb8487258b4745350b15a6492e2b6b693 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 13 Feb 2018 18:30:41 +0100 Subject: [PATCH 34/64] solved issue that breaks master, JobQueue reserved name? --- harmonicIO/master/__main__.py | 3 --- harmonicIO/master/rest_service.py | 13 +++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index 5ba272f..0b05f81 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -40,9 +40,6 @@ def run_msg_service(): # server.server_close() -def run_job_sevice(): - return None - if __name__ == '__main__': """ Entry point diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 6839155..5d67f04 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -9,7 +9,7 @@ from urllib3.request import urlencode import json -from jobqueue import JobQueue +#from jobqueue import JobQueue class RequestStatus(object): @@ -315,17 +315,18 @@ def new_job(req, res): ### below ID randomizer from: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python def rand_id(N): - from random.SystemRandom import choice + from random import SystemRandom import string - return ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(N)) + return ''.join(SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(N)) ### # make sure ID is new job_id = rand_id(5) - while job_id in LookUpTable.Jobs.__jobs: - job_id = rand_id(5) - +# while job_id in LookUpTable.Jobs.__jobs: +# job_id = rand_id(5) +# TODO: implement function to check if ID in jobs # add job to table + job_req = {} job_req['job_id'] = job_id job_req['job_status'] = JobStatus.INIT job_req['ttl'] = 30 From a714224122e749ebb4690b2afcbaf67375b58454 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 14 Feb 2018 18:48:52 +0000 Subject: [PATCH 35/64] Created job queue, added queue manager thread to main, creating container uses queue --- harmonicIO/general/definition.py | 12 ++-- harmonicIO/master/__main__.py | 15 ++++- harmonicIO/master/configuration.json | 2 +- harmonicIO/master/jobqueue.py | 44 ++++++++++++- harmonicIO/master/meta_table.py | 14 ++-- harmonicIO/master/rest_service.py | 97 ++++++++++++---------------- harmonicIO/worker/configuration.json | 4 +- 7 files changed, 115 insertions(+), 73 deletions(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index aaecb43..32abfef 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -24,10 +24,10 @@ class CTuple: class JobStatus: - INIT = 0 - READY = 1 - ACTIVE = 2 - IDLE = 3 + INIT = "INITIALIZING" + READY = "READY" + ACTIVE = "ACTIVE" + IDLE = "IDLE" class Definition(object): @@ -91,10 +91,6 @@ def get_str_idle_time(): def get_str_data_port_range(): return "node_data_port_range" - @staticmethod - def get_str_idle_time(): - return "std_idle_time" - @staticmethod def get_str_token(): return "token" diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index 0b05f81..633b49d 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -1,10 +1,23 @@ from harmonicIO.general.services import SysOut - +from .jobqueue import JobManager """ Master entry point """ +def run_queue_manager(): + """ + Run job queue manager thread + can be several managers to manage large amount of queued jobs + """ + import threading + manager = JobManager() + manager_thread = threading.Thread(target=manager.job_queuer) + manager_thread.daemon = True + manager_thread.start() + + SysOut.out_string("Job queue started") + def run_rest_service(): """ diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index 5a43bfc..7e6ec84 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -1,6 +1,6 @@ { "node_name": "PE Master", - "master_addr": "192.168.0.84", + "master_addr": "192.168.1.9", "node_port": 8080, "node_data_port_range": [8090,8090], "std_idle_time": 5 diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index ead0367..64e085e 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -1,6 +1,44 @@ -import threading, queue +import queue +import json +from urllib.request import urlopen +from .meta_table import LookUpTable +from harmonicIO.general.definition import Definition, JobStatus + + +class JobManager(): + + def find_available_worker(self, container): + ## TODO: actually do stuff + return [('192.168.1.9', '0.05')] + + def start_job(self, target_worker, job_data): + # send request to worker + worker_url = "http://{}:8081/docker?token=None&command=create".format(target_worker) + req_data = bytes(json.dumps(job_data), 'utf-8') + resp = urlopen(worker_url, req_data) + + if resp.getcode() == 200: # container was created + return True + ## TODO: add port? + return False + + def job_queuer(self): + while True: + job_data = JobQueue.q.get() + print("Got some work!") + candidates = self.find_available_worker(job_data.get('c_name')) + worker_ip = candidates[0][0] + if self.start_job(worker_ip, job_data): + job_data[Definition.get_str_node_port()] = 1337 + job_data[Definition.get_str_node_addr()] = worker_ip + job_data['job_status'] = JobStatus.READY + LookUpTable.Jobs.update_job(job_data) ## TODO: double check nothing wrong gets updated + print("Completed job!") + JobQueue.q.task_done() class JobQueue(object): + q = queue.Queue() - def queue_job(job_data): - print("Not yet implemented") + @staticmethod + def queue_new_job(job_data): + JobQueue.q.put(job_data) \ No newline at end of file diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 0b62e94..ad87565 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -126,12 +126,12 @@ def update_job(request): job_id = request.get('job_id') if not job_id in LookUpTable.Jobs.__jobs: SysOut.warn_string("Couldn't update job, no existing job matching ID!") - return None + return False - tkn = request.get(Definitions.get_str_token()) + tkn = request.get(Definition.get_str_token()) if not tkn == LookUpTable.Jobs.__jobs[job_id]['user_token']: SysOut.warn_string("Incorrect token, refusing update.") - return None + return False old_job = LookUpTable.Jobs.__jobs[job_id] old_job['job_status'] = request.get('job_status') @@ -144,6 +144,8 @@ def update_job(request): host[Definition.get_str_node_addr()] = request.get(Definition.get_str_node_addr()) old_job['host_container'] = host + return True + @staticmethod def verbose(): return LookUpTable.Jobs.__jobs @@ -162,7 +164,11 @@ def new_job(request): @staticmethod def update_job(request): - LookUpTable.Jobs.update_job(request) + return LookUpTable.Jobs.update_job(request) + + @staticmethod + def poll_id(id): + return id in LookUpTable.Jobs.verbose() @staticmethod def verbose(): diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 5d67f04..e16e727 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -9,7 +9,13 @@ from urllib3.request import urlencode import json -#from jobqueue import JobQueue +from .jobqueue import JobQueue + +def format_response_string(res, http_code, msg): + res.body = msg + '\n' + res.status = http_code + res.content_type = "String" + return res class RequestStatus(object): @@ -21,22 +27,15 @@ def on_get(self, req, res): GET: /status?token={None} """ if not Definition.get_str_token() in req.params: - res.body = "Token is required." - res.content_type = "String" - res.status = falcon.HTTP_401 + format_response_string(res, falcon.HTTP_401, "Token is required") return if req.params[Definition.get_str_token()] == Setting.get_token(): - result = LService.get_machine_status(Setting, CRole.MASTER) - - res.body = str(result) - res.content_type = "String" - res.status = falcon.HTTP_200 + format_response_string(res, falcon.HTTP_200, str(result)) + else: - res.body = "Invalid token ID." - res.content_type = "String" - res.status = falcon.HTTP_401 + format_response_string(res, falcon.HTTP_401,"Invalid token ID") def on_put(self, req, res): """ @@ -249,18 +248,25 @@ def __init__(self): def on_get(self, req, res): # check token and request type is provided if not Definition.get_str_token() in req.params: - res.body = "Token is required." - res.content_type = "String" - res.status = falcon.HTTP_401 + format_response_string(res, falcon.HTTP_401, "Token required.") return if not "type" in req.params: - res.body = "No command specified." - res.content_type = "String" - res.status = falcon.HTTP_406 + format_response_string(res, falcon.HTTP_406, "Command not specified.") return - return + if req.params['type'] == "poll_job": + id = req.params.get('job_id') + if not id in LookUpTable.Jobs.verbose(): + format_response_string(res, falcon.HTTP_404, "Specified job not available.") + return + + job = LookUpTable.Jobs.verbose() + if job: + stat = str(job[id].get('job_status')) + format_response_string(res, falcon.HTTP_200, ("Job status: " + stat)) + + return def on_post(self, req, res): # check token and request type is provided @@ -278,7 +284,14 @@ def on_post(self, req, res): # request to create new job - create ID for job, add to lookup table, queue creation of the job if req.params['type'] == 'new_job': - new_job(req, res) + job = new_job(req) + if not job: + SysOut.err_string("New job could not be added!") + format_response_string(res, falcon.HTTP_500, "Could not create job.") + return + job_status = job.get('job_status') + format_response_string(res, falcon.HTTP_200, "Job request received, container status: {}\nJob ID: {}".format(job_status, job.get('job_id'))) + return return @@ -308,11 +321,7 @@ def run(self): self.__server.serve_forever() -def new_job(req, res): - # create job ID - print("Requested new job!") - job_params = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of parameters if they exist - +def new_job(req): ### below ID randomizer from: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python def rand_id(N): from random import SystemRandom @@ -320,31 +329,24 @@ def rand_id(N): return ''.join(SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(N)) ### - # make sure ID is new + # create job ID, make sure ID is new job_id = rand_id(5) -# while job_id in LookUpTable.Jobs.__jobs: -# job_id = rand_id(5) -# TODO: implement function to check if ID in jobs + while LookUpTable.poll_id(job_id): + job_id = rand_id(5) + # add job to table - job_req = {} + job_req = req.params job_req['job_id'] = job_id job_req['job_status'] = JobStatus.INIT job_req['ttl'] = 30 job_req['start_time'] = LService.get_current_timestamp() if not LookUpTable.Jobs.new_job(job_req): - SysOut.err_string("New job could not be added!") - res.body = "Could not create job." - res.content_type = "String" - res.status = falcon.HTTP_500 - return - - # prepare response - res.body = "Request received, allocating resources for job - Job ID: {}".format(job_id) - res.content_type = "String" - res.status = falcon.HTTP_200 + return None # queue creation - JobQueue.queue_job(job_req) + JobQueue.queue_new_job(job_req) + + return job_req def find_available_worker(job_req): # get server data @@ -386,19 +388,6 @@ def find_available_worker(job_req): return candidates -def start_job(target_worker): - - # send request to worker - worker_url = "http://{}:8081/docker?token=None&command=create".format(candidates[0][0]) - print(worker_url, '\n', job_req, '\n', bytes(json.dumps(job_req), 'utf-8')) - with urlopen(worker_url, bytes(json.dumps(job_req), 'utf-8')) as response: - html = response.read() - - worker_response = html.decode('UTF-8') - print(worker_response) - job_status = "ACTIVE" - print(job_status) - def get_html_form(worker, msg, containers, tuples): html = """ diff --git a/harmonicIO/worker/configuration.json b/harmonicIO/worker/configuration.json index b177066..f80eee7 100644 --- a/harmonicIO/worker/configuration.json +++ b/harmonicIO/worker/configuration.json @@ -1,9 +1,9 @@ { "node_name": "PE Worker", "node_port": 8081, - "node_internal_addr": "192.168.0.84", + "node_internal_addr": "192.168.1.9", "node_external_addr": "None", - "master_addr": "192.168.0.84", + "master_addr": "192.168.1.9", "master_port": 8080, "node_data_port_range": [9000, 9010], "std_idle_time": 5 From 4325a3fd5d013eeef5f66a1624061cea3280ab18 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Thu, 15 Feb 2018 16:06:37 +0000 Subject: [PATCH 36/64] bugfix and refactors, actually started job queuer thread, start job properly updates job status --- harmonicIO/general/definition.py | 1 + harmonicIO/master/__main__.py | 3 ++ harmonicIO/master/jobqueue.py | 58 ++++++++++++++++++++++++++----- harmonicIO/master/rest_service.py | 20 +++++------ harmonicIO/worker/rest_service.py | 1 - 5 files changed, 64 insertions(+), 19 deletions(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index 32abfef..924357d 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -28,6 +28,7 @@ class JobStatus: READY = "READY" ACTIVE = "ACTIVE" IDLE = "IDLE" + FAILED = "FAILED" class Definition(object): diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index 633b49d..1e73464 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -77,3 +77,6 @@ def run_msg_service(): # Binding commander to the rest service and enable REST service pool.submit(run_rest_service) + + # Run job queue manager thread + pool.submit(run_queue_manager) diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index 64e085e..81076cd 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -3,7 +3,47 @@ from urllib.request import urlopen from .meta_table import LookUpTable from harmonicIO.general.definition import Definition, JobStatus +from harmonicIO.general.services import SysOut +def find_available_worker(job_req): + # get server data + data = LookUpTable.verbose() + data['MSG'] = MessagesQueue.verbose() + candidates = [] + target_container = job_req[Definition.Container.get_str_con_image_name()] + + for worker in data["WORKERS"]: + if worker[Definition.REST.get_str_local_imgs()]: + + for image in worker[Definition.REST.get_str_local_imgs()]: + if target_container in image.tags: + candidate = (worker["node_addr"], worker["load5"]) # create tuple with IP and load on worker with container + + # find suitable worker by prio 1 + if target_container in data["CONTAINERS"]: + print("Looking for container called " + target_container) + for container in data["CONTAINERS"][target_container]: + candidate = ((container["batch_addr"], data["WORKERS"][container["batch_addr"]]["load5"])) # create tuple with IP and load on worker with container + if candidate[1] < 0.5: # only add candidate if worker load less than 50% + candidates.append(candidate) + + + # find suitable worker by prio 2 + elif data["WORKERS"]: + for worker in data["WORKERS"]: + + if candidate[1] < 0.5: + candidates.append(candidate) + + # no suitable worker available + else: + return None + + candidates.sort(key=lambda index: index[1]) # sort candidate workers on load (avg. load last 5 minutes) + print('Candidates:\n' + candidates) + print(str(candidates[0]) + " has least load, sending request here!") + + return candidates class JobManager(): @@ -15,7 +55,7 @@ def start_job(self, target_worker, job_data): # send request to worker worker_url = "http://{}:8081/docker?token=None&command=create".format(target_worker) req_data = bytes(json.dumps(job_data), 'utf-8') - resp = urlopen(worker_url, req_data) + resp = urlopen(worker_url, req_data) # NOTE: might need increase in timeout to allow download of large container images!!! if resp.getcode() == 200: # container was created return True @@ -25,16 +65,18 @@ def start_job(self, target_worker, job_data): def job_queuer(self): while True: job_data = JobQueue.q.get() - print("Got some work!") candidates = self.find_available_worker(job_data.get('c_name')) worker_ip = candidates[0][0] - if self.start_job(worker_ip, job_data): - job_data[Definition.get_str_node_port()] = 1337 - job_data[Definition.get_str_node_addr()] = worker_ip - job_data['job_status'] = JobStatus.READY + try: + if self.start_job(worker_ip, job_data): + job_data[Definition.get_str_node_port()] = 1337 + job_data[Definition.get_str_node_addr()] = worker_ip + job_data['job_status'] = JobStatus.READY + except: + job_data['job_status'] = JobStatus.FAILED + finally: LookUpTable.Jobs.update_job(job_data) ## TODO: double check nothing wrong gets updated - print("Completed job!") - JobQueue.q.task_done() + JobQueue.q.task_done() class JobQueue(object): q = queue.Queue() diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index e16e727..efa6776 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -283,8 +283,9 @@ def on_post(self, req, res): return # request to create new job - create ID for job, add to lookup table, queue creation of the job + job_params = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of body data if they exist if req.params['type'] == 'new_job': - job = new_job(req) + job = new_job(job_params) if not job: SysOut.err_string("New job could not be added!") format_response_string(res, falcon.HTTP_500, "Could not create job.") @@ -321,7 +322,7 @@ def run(self): self.__server.serve_forever() -def new_job(req): +def new_job(job_params): ### below ID randomizer from: https://stackoverflow.com/questions/2257441/random-string-generation-with-upper-case-letters-and-digits-in-python def rand_id(N): from random import SystemRandom @@ -335,18 +336,17 @@ def rand_id(N): job_id = rand_id(5) # add job to table - job_req = req.params - job_req['job_id'] = job_id - job_req['job_status'] = JobStatus.INIT - job_req['ttl'] = 30 - job_req['start_time'] = LService.get_current_timestamp() - if not LookUpTable.Jobs.new_job(job_req): + job_params['job_id'] = job_id + job_params['job_status'] = JobStatus.INIT + job_params['ttl'] = 30 + job_params['start_time'] = LService.get_current_timestamp() + if not LookUpTable.Jobs.new_job(job_params): return None # queue creation - JobQueue.queue_new_job(job_req) + JobQueue.queue_new_job(job_params) - return job_req + return job_params def find_available_worker(job_req): # get server data diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 68e0d45..2ed35d9 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -56,7 +56,6 @@ def on_post(self, req, res): if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): # Unpack the posted data raw = req.stream.read(req.content_length or 0) - print(raw, str(raw, 'utf-8')) data = json.loads(str(raw, 'utf-8')) # create dict of parameters if they exist if not data[Definition.Container.get_str_con_image_name()]: From 6de996a84754c27863074bc6f6222d3509936ae3 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Fri, 16 Feb 2018 14:36:06 +0000 Subject: [PATCH 37/64] worker updates local images correctly --- harmonicIO/master/jobqueue.py | 4 ++-- harmonicIO/worker/docker_master.py | 8 +++++++- harmonicIO/worker/rest_service.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index 81076cd..fed6633 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -5,7 +5,7 @@ from harmonicIO.general.definition import Definition, JobStatus from harmonicIO.general.services import SysOut -def find_available_worker(job_req): +def asdasd(job_req): # get server data data = LookUpTable.verbose() data['MSG'] = MessagesQueue.verbose() @@ -49,7 +49,7 @@ class JobManager(): def find_available_worker(self, container): ## TODO: actually do stuff - return [('192.168.1.9', '0.05')] + return [('192.168.1.8', '0.05')] def start_job(self, target_worker, job_data): # send request to worker diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 6ad8860..2086a12 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -71,7 +71,13 @@ def get_container_status(input): return res def get_local_images(self): - self.__client.images.list() + # get a list of all tags of all locally available images on this machine + imgs = self.__client.images.list() + local_imgs = [] + for img in imgs: + local_imgs += img.tags + + return local_imgs def run_container(self, container_name): diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 2ed35d9..f0a1753 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -56,7 +56,7 @@ def on_post(self, req, res): if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): # Unpack the posted data raw = req.stream.read(req.content_length or 0) - data = json.loads(str(raw, 'utf-8')) # create dict of parameters if they exist + data = json.loads(str(raw, 'utf-8')) # create dict of body data if it exists if not data[Definition.Container.get_str_con_image_name()]: res.body = "Required parameters are not supplied!" From e32f4638d41c5bd85eeaddf9105fff8e9c6be351 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Fri, 16 Feb 2018 16:44:11 +0000 Subject: [PATCH 38/64] job queuer searches for appropriate worker node for putting container --- harmonicIO/master/jobqueue.py | 66 ++++++++++++----------------------- 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index fed6633..08ed681 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -5,51 +5,29 @@ from harmonicIO.general.definition import Definition, JobStatus from harmonicIO.general.services import SysOut -def asdasd(job_req): - # get server data - data = LookUpTable.verbose() - data['MSG'] = MessagesQueue.verbose() - candidates = [] - target_container = job_req[Definition.Container.get_str_con_image_name()] - - for worker in data["WORKERS"]: - if worker[Definition.REST.get_str_local_imgs()]: - - for image in worker[Definition.REST.get_str_local_imgs()]: - if target_container in image.tags: - candidate = (worker["node_addr"], worker["load5"]) # create tuple with IP and load on worker with container - - # find suitable worker by prio 1 - if target_container in data["CONTAINERS"]: - print("Looking for container called " + target_container) - for container in data["CONTAINERS"][target_container]: - candidate = ((container["batch_addr"], data["WORKERS"][container["batch_addr"]]["load5"])) # create tuple with IP and load on worker with container - if candidate[1] < 0.5: # only add candidate if worker load less than 50% - candidates.append(candidate) - - - # find suitable worker by prio 2 - elif data["WORKERS"]: - for worker in data["WORKERS"]: - - if candidate[1] < 0.5: - candidates.append(candidate) - - # no suitable worker available - else: - return None - - candidates.sort(key=lambda index: index[1]) # sort candidate workers on load (avg. load last 5 minutes) - print('Candidates:\n' + candidates) - print(str(candidates[0]) + " has least load, sending request here!") - - return candidates - class JobManager(): def find_available_worker(self, container): - ## TODO: actually do stuff - return [('192.168.1.8', '0.05')] + candidates = [] + workers = LookUpTable.Workers.verbose() + + if not workers: + return None + + # loop through workers and make tuples of worker IP, load and if requested container is available locally + for worker in workers: + curr_worker = workers[worker] + if container in curr_worker[Definition.REST.get_str_local_imgs()]: + candidates.append((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_load5()], True)) + else: + candidates.append((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_load5()], False)) + + candidates.sort(key=lambda x: (-x[2], x[1])) # sort candidate workers first on availability of image, then on load (avg load last 5 mins) + for candidate in candidates: + if float(candidate[1]) < 0.5: + return candidate + + return None def start_job(self, target_worker, job_data): # send request to worker @@ -65,9 +43,9 @@ def start_job(self, target_worker, job_data): def job_queuer(self): while True: job_data = JobQueue.q.get() - candidates = self.find_available_worker(job_data.get('c_name')) - worker_ip = candidates[0][0] + target = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name())) try: + worker_ip = target[0] if self.start_job(worker_ip, job_data): job_data[Definition.get_str_node_port()] = 1337 job_data[Definition.get_str_node_addr()] = worker_ip From 0750c817089e6a75c6e88baf2fa6e874f5e955a8 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Fri, 23 Feb 2018 16:04:44 +0000 Subject: [PATCH 39/64] removed uneccessary TODO's, cleanup in Jobs in metatable --- harmonicIO/master/jobqueue.py | 5 +---- harmonicIO/master/meta_table.py | 5 ----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index 08ed681..c001fdf 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -37,7 +37,6 @@ def start_job(self, target_worker, job_data): if resp.getcode() == 200: # container was created return True - ## TODO: add port? return False def job_queuer(self): @@ -47,13 +46,11 @@ def job_queuer(self): try: worker_ip = target[0] if self.start_job(worker_ip, job_data): - job_data[Definition.get_str_node_port()] = 1337 - job_data[Definition.get_str_node_addr()] = worker_ip job_data['job_status'] = JobStatus.READY except: job_data['job_status'] = JobStatus.FAILED finally: - LookUpTable.Jobs.update_job(job_data) ## TODO: double check nothing wrong gets updated + LookUpTable.Jobs.update_job(job_data) JobQueue.q.task_done() class JobQueue(object): diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index ad87565..cde3a11 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -135,14 +135,9 @@ def update_job(request): old_job = LookUpTable.Jobs.__jobs[job_id] old_job['job_status'] = request.get('job_status') - old_job[Definition.Container.get_str_con_image_name()] = request.get(Definition.Container.get_str_con_image_name()) #old_job['user_token'] = request.get(Definition.get_str_token()) # should not be able to change user who requested job? if 'ttl' in request: old_job['time_to_live'] = request.get('ttl') - host = {} - host[Definition.get_str_node_port()] = request.get(Definition.get_str_node_port()) - host[Definition.get_str_node_addr()] = request.get(Definition.get_str_node_addr()) - old_job['host_container'] = host return True From 7705efe9d7896eda08a4691a35d717f2396b072a Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Fri, 23 Feb 2018 16:27:28 +0000 Subject: [PATCH 40/64] located where to implement garbage collection, see TODO --- harmonicIO/master/rest_service.py | 1 + harmonicIO/worker/docker_master.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index efa6776..9568f36 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -52,6 +52,7 @@ def on_put(self, req, res): data = eval(raw) LookUpTable.update_worker(data) + ## TODO: make sure also available containers are updated! SysOut.debug_string("Update worker status ({0})".format(data[Definition.get_str_node_name()])) res.body = "Okay" diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 2086a12..8c7cf59 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -63,7 +63,7 @@ def get_container_status(input): return res res = [] - for item in self.__client.containers.list(): + for item in self.__client.containers.list(): ## TODO: add all=True? res.append(get_container_status(item)) # To print all logs: #print(item.logs(stdout=True, stderr=True)) From 37f7d01a46a6cb7643bd0b80cbd64be0402dca37 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Mon, 26 Feb 2018 16:09:18 +0000 Subject: [PATCH 41/64] small bugfixes, added api call to notify master that container is exiting, double hop via worker api call --- harmonicIO/general/definition.py | 8 ++++ harmonicIO/master/configuration.json | 2 +- harmonicIO/master/meta_table.py | 21 ++++++++- harmonicIO/master/rest_service.py | 66 ++++++++-------------------- harmonicIO/master/server_socket.py | 4 +- harmonicIO/worker/__main__.py | 3 ++ harmonicIO/worker/configuration.json | 4 +- harmonicIO/worker/rest_service.py | 40 ++++++++++++++++- 8 files changed, 93 insertions(+), 55 deletions(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index 924357d..d887c88 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -180,6 +180,10 @@ def get_str_stream_req(): def get_str_msg_query(): return "messagesQuery" + @staticmethod + def get_str_job_mgr(): + return "jobRequest" + @staticmethod def get_str_reg_func(): return "registeredFunctions" @@ -289,6 +293,10 @@ def get_str_status(): def get_str_query(): return "query" + @staticmethod + def get_str_finished(): + return "finished" + class HDE(object): @staticmethod diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index 7e6ec84..7bc2373 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -1,6 +1,6 @@ { "node_name": "PE Master", - "master_addr": "192.168.1.9", + "master_addr": "192.168.1.12", "node_port": 8080, "node_data_port_range": [8090,8090], "std_idle_time": 5 diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index cde3a11..002d080 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -39,7 +39,7 @@ def get_container_object(req): ret[Definition.REST.Batch.get_str_batch_status()] = int(req.params[Definition.REST.Batch.get_str_batch_status()]) ret[Definition.Container.get_str_con_image_name()] = req.params[Definition.Container.get_str_con_image_name()].strip() ret[Definition.get_str_last_update()] = Services.get_current_timestamp() - + ## TODO: add s_id return ret @staticmethod @@ -67,6 +67,21 @@ def get_candidate_container(image_name): return None + @staticmethod + def del_container(container_name, short_id): + conts = LookUpTable.Containers.__containers.get(container_name) + if not conts: + return False + else: + # conts is list of containers with same c_name + + # List filter code based on: https://stackoverflow.com/questions/1235618/python-remove-dictionary-from-list + # Removes + conts[:] = [con for con in conts if con.get(Definition.Container.Status.get_str_sid) != short_id] + + + + class Tuples(object): __tuples = {} @@ -165,6 +180,10 @@ def update_job(request): def poll_id(id): return id in LookUpTable.Jobs.verbose() + @staticmethod + def remove_container(c_name, csid): + return LookUpTable.Containers.del_container(c_name, csid) + @staticmethod def verbose(): ret = dict() diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 9568f36..9393897 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -47,12 +47,22 @@ def on_put(self, req, res): res.status = falcon.HTTP_401 return + if Definition.Docker.get_str_finished() in req.params: + # a container is shutting down, update containers + if LookUpTable.remove_container( + req.params.get(Definition.Container.get_str_con_image_name()), + req.params.get(Definition.Container.Status.get_str_sid()) + ): + format_response_string(res, falcon.HTTP_200, "Container successfully removed") + else: + format_response_string(res, falcon.HTTP_400, "Could not remove container from table!") + return + + if req.params[Definition.get_str_token()] == Setting.get_token(): - raw = str(req.stream.read(), 'UTF-8') - data = eval(raw) + data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) LookUpTable.update_worker(data) - ## TODO: make sure also available containers are updated! SysOut.debug_string("Update worker status ({0})".format(data[Definition.get_str_node_name()])) res.body = "Okay" @@ -63,6 +73,7 @@ def on_put(self, req, res): res.content_type = "String" res.status = falcon.HTTP_401 + return class MessageStreaming(object): def __init__(self): @@ -242,7 +253,7 @@ def on_get(self, req, res): res.content_type = "String" res.status = falcon.HTTP_200 -class ClientManager(object): +class JobManager(object): def __init__(self): pass @@ -286,7 +297,7 @@ def on_post(self, req, res): # request to create new job - create ID for job, add to lookup table, queue creation of the job job_params = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of body data if they exist if req.params['type'] == 'new_job': - job = new_job(job_params) + job = new_job(job_params) # attempt to create new job from provided parameters if not job: SysOut.err_string("New job could not be added!") format_response_string(res, falcon.HTTP_500, "Could not create job.") @@ -312,8 +323,8 @@ def __init__(self): # Add route for msg query api.add_route('/' + Definition.REST.get_str_msg_query(), MessagesQuery()) - # Add route for client manager - api.add_route('/' + 'clientManagement', ClientManager()) + # Add route for job manager + api.add_route('/' + Definition.REST.get_str_job_mgr(), JobManager()) # Establishing a REST server self.__server = make_server(Setting.get_node_addr(), Setting.get_node_port(), api) @@ -349,47 +360,6 @@ def rand_id(N): return job_params -def find_available_worker(job_req): - # get server data - data = LookUpTable.verbose() - data['MSG'] = MessagesQueue.verbose() - candidates = [] - target_container = job_req[Definition.Container.get_str_con_image_name()] - - for worker in data["WORKERS"]: - if worker[Definition.REST.get_str_local_imgs()]: - - for image in worker[Definition.REST.get_str_local_imgs()]: - if target_container in image.tags: - candidate = (worker["node_addr"], worker["load5"]) # create tuple with IP and load on worker with container - - # find suitable worker by prio 1 - if target_container in data["CONTAINERS"]: - print("Looking for container called " + target_container) - for container in data["CONTAINERS"][target_container]: - candidate = ((container["batch_addr"], data["WORKERS"][container["batch_addr"]]["load5"])) # create tuple with IP and load on worker with container - if candidate[1] < 0.5: # only add candidate if worker load less than 50% - candidates.append(candidate) - - - # find suitable worker by prio 2 - elif data["WORKERS"]: - for worker in data["WORKERS"]: - - if candidate[1] < 0.5: - candidates.append(candidate) - - # no suitable worker available - else: - return None - - candidates.sort(key=lambda index: index[1]) # sort candidate workers on load (avg. load last 5 minutes) - print('Candidates:\n' + candidates) - print(str(candidates[0]) + " has least load, sending request here!") - - return candidates - - def get_html_form(worker, msg, containers, tuples): html = """ diff --git a/harmonicIO/master/server_socket.py b/harmonicIO/master/server_socket.py index afeefd3..57f94d8 100644 --- a/harmonicIO/master/server_socket.py +++ b/harmonicIO/master/server_socket.py @@ -1,6 +1,6 @@ import socketserver from .messaging_system import MessagesQueue - +from harmonicIO.general.services import SysOut class ThreadedTCPServer(socketserver.ThreadingMixIn, socketserver.TCPServer): """ @@ -42,4 +42,4 @@ def handle(self): except: from harmonicIO.general.services import Services - Services.e_print("Insufficient memory for storing g object.") + SysOut.err_string("Insufficient memory for storing g object.") diff --git a/harmonicIO/worker/__main__.py b/harmonicIO/worker/__main__.py index 36a10c2..e5c6179 100644 --- a/harmonicIO/worker/__main__.py +++ b/harmonicIO/worker/__main__.py @@ -16,6 +16,7 @@ def run_rest_service(): rest = RESTService() rest.run() +## TODO: add gc thread def update_worker_status(): """ @@ -84,3 +85,5 @@ def update_worker_status(): # Update the worker status pool.submit(update_worker_status) + + ##TODO: ACTUALLY PUT THE DAMN THREAD START IN HERE LOL diff --git a/harmonicIO/worker/configuration.json b/harmonicIO/worker/configuration.json index f80eee7..cf0d82c 100644 --- a/harmonicIO/worker/configuration.json +++ b/harmonicIO/worker/configuration.json @@ -1,9 +1,9 @@ { "node_name": "PE Worker", "node_port": 8081, - "node_internal_addr": "192.168.1.9", + "node_internal_addr": "192.168.1.12", "node_external_addr": "None", - "master_addr": "192.168.1.9", + "master_addr": "192.168.1.12", "master_port": 8080, "node_data_port_range": [9000, 9010], "std_idle_time": 5 diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index f0a1753..6c6e3a1 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -5,6 +5,27 @@ from harmonicIO.general.definition import Definition, CRole import json + +# function that sends request to master to notify exiting of a container +def notify_master_container_finished(csid): + from urllib.request import urlopen, Request + + notify_url = "http://{}:{}/{}?token=None&{}={}".format( + Setting.get_master_addr, + Setting.get_master_port, + Definition.REST.get_str_status(), + Definition.Docker.get_str_finished(), + csid + ) + req = Request(url=notify_url, method='PUT') + resp = urlopen(req) + + if resp.getcode() == 200: + # container was removed on master + return True + return False + + class ContainerService(object): def __init__(self): pass @@ -31,8 +52,25 @@ def on_get(self, req, res): res.body = str(body) res.content_type = "String" res.status = falcon.HTTP_200 + return + + # Container is exiting, notify master to update + if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_finished(): + res.content_type = "String" + + container_short_id = req.params.get(Definition.Container.Status.get_str_sid) + if container_short_id: + if not notify_master_container_finished(container_short_id): + res.body = "Could not find requested container running." + res.status = falcon.HTTP_404 + else: + res.body = "ACK: Container terminated, master notified." + res.status = falcon.HTTP_200 + else: + res.body = "Container short id required" + res.status = falcon.HTTP_400 + - ## TODO: add list containers def on_post(self, req, res): """ From 4252532138bb27509151e6cfbc730a139a4c1ce3 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Tue, 27 Feb 2018 11:18:03 +0000 Subject: [PATCH 42/64] test --- harmonicIO/master/configuration.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index 7bc2373..24f4bc0 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -1,6 +1,6 @@ { "node_name": "PE Master", - "master_addr": "192.168.1.12", + "master_addr": "192.168.1.11", "node_port": 8080, "node_data_port_range": [8090,8090], "std_idle_time": 5 From 9e7572206c595f543b7fe24498a2dd848d28950c Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Tue, 27 Feb 2018 11:19:47 +0000 Subject: [PATCH 43/64] test successful! --- harmonicIO/master/configuration.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index 24f4bc0..7bc2373 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -1,6 +1,6 @@ { "node_name": "PE Master", - "master_addr": "192.168.1.11", + "master_addr": "192.168.1.12", "node_port": 8080, "node_data_port_range": [8090,8090], "std_idle_time": 5 From dd5c3c50ad532d3ca22b435469493dc66d390c8e Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 28 Feb 2018 14:44:50 +0000 Subject: [PATCH 44/64] added garbage collector of exited containers to worker, adding short id of container to master info --- harmonicIO/master/jobqueue.py | 3 ++- harmonicIO/worker/__main__.py | 25 ++++++++++++++++------ harmonicIO/worker/docker_master.py | 17 +++++++++++++-- harmonicIO/worker/docker_service.py | 4 ++++ harmonicIO/worker/garbage_collector.py | 29 ++++++++++++++++++++++++++ harmonicIO/worker/rest_service.py | 2 +- 6 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 harmonicIO/worker/garbage_collector.py diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index c001fdf..9af9408 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -36,7 +36,8 @@ def start_job(self, target_worker, job_data): resp = urlopen(worker_url, req_data) # NOTE: might need increase in timeout to allow download of large container images!!! if resp.getcode() == 200: # container was created - return True + SysOut.debug_string(resp.read()) + return resp.read() return False def job_queuer(self): diff --git a/harmonicIO/worker/__main__.py b/harmonicIO/worker/__main__.py index e5c6179..5cb2bcf 100644 --- a/harmonicIO/worker/__main__.py +++ b/harmonicIO/worker/__main__.py @@ -6,6 +6,8 @@ from .configuration import Setting from harmonicIO.general.services import SysOut, Services from harmonicIO.general.definition import Definition, CRole +from .garbage_collector import GarbageCollector +import json def run_rest_service(): @@ -16,7 +18,15 @@ def run_rest_service(): rest = RESTService() rest.run() -## TODO: add gc thread + +def start_gc_thread(): + carbage_collector = GarbageCollector(10) + gc_thread = threading.Thread(carbage_collector.collect_exited_containers()) + gc_thread.daemon = True + gc_thread.start() + + SysOut.out_string("Garbage collector started") + def update_worker_status(): """ @@ -28,16 +38,18 @@ def update_worker_status(): Get machine status by calling a unix command and fetch for load average """ - s_content = Services.get_machine_status(Setting, CRole.WORKER) - s_content[Definition.REST.get_str_docker()] = DockerService.get_containers_status() - s_content[Definition.REST.get_str_local_imgs()] = DockerService.get_local_images() + content = Services.get_machine_status(Setting, CRole.WORKER) + content[Definition.REST.get_str_docker()] = DockerService.get_containers_status() + content[Definition.REST.get_str_local_imgs()] = DockerService.get_local_images() + + s_content = bytes(json.dumps(content), 'utf-8') html = urllib3.PoolManager() try: r = html.request('PUT', Definition.Master.get_str_check_master(Setting.get_master_addr(), Setting.get_master_port(), Setting.get_token()), - body=str(s_content)) + body=s_content) if r.status != 200: SysOut.err_string("Cannot update worker status to the master!") @@ -86,4 +98,5 @@ def update_worker_status(): # Update the worker status pool.submit(update_worker_status) - ##TODO: ACTUALLY PUT THE DAMN THREAD START IN HERE LOL + # Start garbage collector thread + pool.submit(start_gc_thread) \ No newline at end of file diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 8c7cf59..1a4c7a0 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -4,6 +4,8 @@ from harmonicIO.general.definition import CStatus, Definition from harmonicIO.general.services import SysOut +from docker.errors import APIError +from requests.exceptions import HTTPError class ChannelStatus(object): def __init__(self, port): @@ -63,7 +65,7 @@ def get_container_status(input): return res res = [] - for item in self.__client.containers.list(): ## TODO: add all=True? + for item in self.__client.containers.list(all=True): res.append(get_container_status(item)) # To print all logs: #print(item.logs(stdout=True, stderr=True)) @@ -79,6 +81,16 @@ def get_local_images(self): return local_imgs + def delete_container(self, cont_shortid): + # remove a container from the worker by provided short id, only removes exited containers + try: + self.__client.containers.get(cont_shortid).remove() + return True + except (ApiError, HTTPError) as e: + SysOut.err_string("Could not remove requested container, exception:\n{}".format(e)) + return False + + def run_container(self, container_name): def get_ports_setting(expose, ports): @@ -119,7 +131,8 @@ def get_env_setting(expose, a_port): if res: SysOut.out_string("Container " + container_name + " is created!") SysOut.out_string("Container " + container_name + " is " + res.status + " ") - return True + # return short id of container + return res.short_id else: SysOut.out_string("Container " + container_name + " cannot be created!") return False diff --git a/harmonicIO/worker/docker_service.py b/harmonicIO/worker/docker_service.py index 56b00d1..7a7d9a7 100644 --- a/harmonicIO/worker/docker_service.py +++ b/harmonicIO/worker/docker_service.py @@ -19,3 +19,7 @@ def get_containers_status(): @staticmethod def get_local_images(): return DockerService.__docker_master.get_local_images() + + @staticmethod + def delete_container(csid): + return DockerService.__docker_master.delete_container(csid) diff --git a/harmonicIO/worker/garbage_collector.py b/harmonicIO/worker/garbage_collector.py new file mode 100644 index 0000000..66f42a0 --- /dev/null +++ b/harmonicIO/worker/garbage_collector.py @@ -0,0 +1,29 @@ +from .docker_service import DockerService +from harmonicIO.general.definition import Definition +from harmonicIO.general.services import SysOut + +from time import sleep +class GarbageCollector(): + + # interval between garbage collections in seconds + gc_run_interval = 300 + + def __init__(self, run_interval=300): + self.gc_run_interval = run_interval + + + def collect_exited_containers(self): + while True: + sleep(self.gc_run_interval) + + exited_containers = [] + current_containers = DockerService.get_containers_status() + for cont in current_containers: + # find exited containers + if cont.get(Definition.Container.Status.get_str_status()) == 'exited': + exited_containers.append(cont.get(Definition.Container.Status.get_str_sid())) + + for sid in exited_containers: + if not DockerService.delete_container(sid): + SysOut.debug_string("Could not delete target container: {}".format(sid)) + \ No newline at end of file diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 6c6e3a1..3bb5978 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -104,7 +104,7 @@ def on_post(self, req, res): result = DockerService.create_container(data[Definition.Container.get_str_con_image_name()]) if result: - res.body = "Okay" + res.body = "{}".format(result) res.content_type = "String" res.status = falcon.HTTP_200 return From 2c03cebefb1f1cd23281cc0679802fe420ee8759 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 28 Feb 2018 16:00:21 +0000 Subject: [PATCH 45/64] bugfix, consumed response with sid when checking if worker started container --- harmonicIO/master/jobqueue.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index 9af9408..ad654f9 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -36,8 +36,9 @@ def start_job(self, target_worker, job_data): resp = urlopen(worker_url, req_data) # NOTE: might need increase in timeout to allow download of large container images!!! if resp.getcode() == 200: # container was created - SysOut.debug_string(resp.read()) - return resp.read() + sid = str(resp.read(), 'utf-8') + SysOut.debug_string("Received sid from container: " + sid) + return sid return False def job_queuer(self): @@ -46,9 +47,12 @@ def job_queuer(self): target = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name())) try: worker_ip = target[0] - if self.start_job(worker_ip, job_data): + sid = self.start_job(worker_ip, job_data) + if sid: job_data['job_status'] = JobStatus.READY + job_data[Definition.Container.Status.get_str_sid()] = sid except: + SysOut.err_string("Response from worker threw exception!") job_data['job_status'] = JobStatus.FAILED finally: LookUpTable.Jobs.update_job(job_data) From 8b274e79eb796657af9d359d0dc0c7daa1c93464 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Thu, 1 Mar 2018 16:18:48 +0000 Subject: [PATCH 46/64] added support to request several containers of same image name --- harmonicIO/master/configuration.json | 2 +- harmonicIO/master/jobqueue.py | 32 +++++++++++++++++----------- harmonicIO/master/rest_service.py | 19 +++++++++++------ harmonicIO/worker/configuration.json | 4 ++-- harmonicIO/worker/rest_service.py | 1 - 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index 7bc2373..670da42 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -1,6 +1,6 @@ { "node_name": "PE Master", - "master_addr": "192.168.1.12", + "master_addr": "192.168.1.13", "node_port": 8080, "node_data_port_range": [8090,8090], "std_idle_time": 5 diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index ad654f9..3b901a1 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -44,19 +44,25 @@ def start_job(self, target_worker, job_data): def job_queuer(self): while True: job_data = JobQueue.q.get() - target = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name())) - try: - worker_ip = target[0] - sid = self.start_job(worker_ip, job_data) - if sid: - job_data['job_status'] = JobStatus.READY - job_data[Definition.Container.Status.get_str_sid()] = sid - except: - SysOut.err_string("Response from worker threw exception!") - job_data['job_status'] = JobStatus.FAILED - finally: - LookUpTable.Jobs.update_job(job_data) - JobQueue.q.task_done() + num_of_conts = job_data.get('num') + job_sids = [] + for i in range(num_of_conts): + target = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name())) + try: + worker_ip = target[0] + sid = self.start_job(worker_ip, job_data) + if sid: + job_sids.append(sid) + if len(job_sids) == num_of_conts: + job_data['job_status'] = JobStatus.READY + job_data[Definition.Container.Status.get_str_sid()] = job_sids + except: + SysOut.err_string("Response from worker threw exception!") + job_data['job_status'] = JobStatus.FAILED + break # break makes it stop trying to create new containers as soon as one fails, is this desireable? + ## NOTE: can get really ugly, need to cleanup containers that started OR let user know how many were started instead?? + LookUpTable.Jobs.update_job(job_data) + JobQueue.q.task_done() class JobQueue(object): q = queue.Queue() diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 9393897..180f51d 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -254,6 +254,12 @@ def on_get(self, req, res): res.status = falcon.HTTP_200 class JobManager(object): + """ + JobManager is about taking requests from clients to set up containers + + Provides a post request to let master allocate containers, and get requests to check the status of this. + + """ def __init__(self): pass @@ -267,21 +273,23 @@ def on_get(self, req, res): format_response_string(res, falcon.HTTP_406, "Command not specified.") return + # user wants to know if containers are ready for provided job ID if req.params['type'] == "poll_job": id = req.params.get('job_id') if not id in LookUpTable.Jobs.verbose(): format_response_string(res, falcon.HTTP_404, "Specified job not available.") return - job = LookUpTable.Jobs.verbose() - if job: - stat = str(job[id].get('job_status')) - format_response_string(res, falcon.HTTP_200, ("Job status: " + stat)) + jobs = LookUpTable.Jobs.verbose() + stat = str(jobs[id].get('job_status')) + format_response_string(res, falcon.HTTP_200, ("Job status: " + stat)) return def on_post(self, req, res): # check token and request type is provided + req_data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of body data if they exist + if not Definition.get_str_token() in req.params: res.body = "Token is required." res.content_type = "String" @@ -295,9 +303,8 @@ def on_post(self, req, res): return # request to create new job - create ID for job, add to lookup table, queue creation of the job - job_params = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of body data if they exist if req.params['type'] == 'new_job': - job = new_job(job_params) # attempt to create new job from provided parameters + job = new_job(req_data) # attempt to create new job from provided parameters if not job: SysOut.err_string("New job could not be added!") format_response_string(res, falcon.HTTP_500, "Could not create job.") diff --git a/harmonicIO/worker/configuration.json b/harmonicIO/worker/configuration.json index cf0d82c..24448d4 100644 --- a/harmonicIO/worker/configuration.json +++ b/harmonicIO/worker/configuration.json @@ -1,9 +1,9 @@ { "node_name": "PE Worker", "node_port": 8081, - "node_internal_addr": "192.168.1.12", + "node_internal_addr": "192.168.1.13", "node_external_addr": "None", - "master_addr": "192.168.1.12", + "master_addr": "192.168.1.13", "master_port": 8080, "node_data_port_range": [9000, 9010], "std_idle_time": 5 diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 3bb5978..a069577 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -92,7 +92,6 @@ def on_post(self, req, res): POST: docker?token=None&command=create """ if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_create(): - # Unpack the posted data raw = req.stream.read(req.content_length or 0) data = json.loads(str(raw, 'utf-8')) # create dict of body data if it exists From 8e7d0b5cc9a4c056f90810b7969af94a8258e10f Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Mon, 5 Mar 2018 16:02:50 +0000 Subject: [PATCH 47/64] changed queue to normal list in LookUpTable.__containers, working on notify master when container exits --- harmonicIO/master/configuration.json | 2 +- harmonicIO/master/meta_table.py | 11 ++++++----- harmonicIO/master/rest_service.py | 3 ++- harmonicIO/worker/configuration.json | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index 670da42..eb391d6 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -1,6 +1,6 @@ { "node_name": "PE Master", - "master_addr": "192.168.1.13", + "master_addr": "192.168.1.5", "node_port": 8080, "node_data_port_range": [8090,8090], "std_idle_time": 5 diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 002d080..6a16c17 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -38,8 +38,9 @@ def get_container_object(req): ret[Definition.REST.Batch.get_str_batch_port()] = int(req.params[Definition.REST.Batch.get_str_batch_port()]) ret[Definition.REST.Batch.get_str_batch_status()] = int(req.params[Definition.REST.Batch.get_str_batch_status()]) ret[Definition.Container.get_str_con_image_name()] = req.params[Definition.Container.get_str_con_image_name()].strip() + ret[Definition.Container.Status.get_str_sid()] = req.params[Definition.Container.Status.get_str_sid()] ret[Definition.get_str_last_update()] = Services.get_current_timestamp() - ## TODO: add s_id + return ret @staticmethod @@ -53,17 +54,17 @@ def verbose(): @staticmethod def update_container(dict_input): if dict_input[Definition.Container.get_str_con_image_name()] not in LookUpTable.Containers.__containers: - LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] = queue.Queue() + LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] = [] - LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].put(dict_input) + LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].append(dict_input) @staticmethod def get_candidate_container(image_name): if image_name not in LookUpTable.Containers.__containers: return None - if len(LookUpTable.Containers.__containers[image_name].queue) > 0: - return LookUpTable.Containers.__containers[image_name].get() + if len(LookUpTable.Containers.__containers[image_name]) > 0: + return LookUpTable.Containers.__containers[image_name].pop() return None diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 180f51d..419687d 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -158,7 +158,8 @@ def on_post(self, req, res): if Definition.REST.Batch.get_str_batch_addr() in req.params and \ Definition.REST.Batch.get_str_batch_port() in req.params and \ Definition.REST.Batch.get_str_batch_status() in req.params and \ - Definition.Container.get_str_con_image_name() in req.params: + Definition.Container.get_str_con_image_name() in req.params and \ + Definition.Container.Status.get_str_sid() in req.params: # Check for data type if req.params[Definition.REST.Batch.get_str_batch_port()].isdigit() and \ diff --git a/harmonicIO/worker/configuration.json b/harmonicIO/worker/configuration.json index 24448d4..058243f 100644 --- a/harmonicIO/worker/configuration.json +++ b/harmonicIO/worker/configuration.json @@ -1,9 +1,9 @@ { "node_name": "PE Worker", "node_port": 8081, - "node_internal_addr": "192.168.1.13", + "node_internal_addr": "192.168.1.5", "node_external_addr": "None", - "master_addr": "192.168.1.13", + "master_addr": "192.168.1.5", "master_port": 8080, "node_data_port_range": [9000, 9010], "std_idle_time": 5 From be4fad08b5c8c2387281bc577ad626038b9638c9 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Mon, 5 Mar 2018 16:50:33 +0000 Subject: [PATCH 48/64] fixed container removal bug --- harmonicIO/master/meta_table.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 6a16c17..511671d 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -45,11 +45,7 @@ def get_container_object(req): @staticmethod def verbose(): - ret = dict() - for key, value in LookUpTable.Containers.__containers.items(): - ret[key] = list(value.queue) - - return ret + return LookUpTable.Containers.__containers @staticmethod def update_container(dict_input): @@ -78,7 +74,9 @@ def del_container(container_name, short_id): # List filter code based on: https://stackoverflow.com/questions/1235618/python-remove-dictionary-from-list # Removes - conts[:] = [con for con in conts if con.get(Definition.Container.Status.get_str_sid) != short_id] + conts[:] = [con for con in conts if con.get(Definition.Container.Status.get_str_sid()) != short_id] + + return True From ad5411d13b91dff4a5897b52edb523b765492bd1 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Tue, 6 Mar 2018 15:19:19 +0000 Subject: [PATCH 49/64] added support for requesting volatile jobs and this is managed when creating containers --- harmonicIO/general/definition.py | 8 ++++++++ harmonicIO/master/meta_table.py | 6 +----- harmonicIO/master/rest_service.py | 10 ++++++---- harmonicIO/worker/configuration.json | 3 ++- harmonicIO/worker/configuration.py | 9 ++++++++- harmonicIO/worker/docker_master.py | 9 +++++---- harmonicIO/worker/docker_service.py | 4 ++-- harmonicIO/worker/rest_service.py | 21 ++++++++++++++------- 8 files changed, 46 insertions(+), 24 deletions(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index d887c88..77c0539 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -92,6 +92,10 @@ def get_str_idle_time(): def get_str_data_port_range(): return "node_data_port_range" + @staticmethod + def get_str_container_idle_timeout(): + return "container_idle_timeout" + @staticmethod def get_str_token(): return "token" @@ -330,3 +334,7 @@ def get_str_std_idle_time(): @staticmethod def get_str_token(): return "HDE_TOKEN" + + @staticmethod + def get_str_idle_timeout(): + return "HDE_IDLE_TIMEOUT" diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 511671d..59bc9ec 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -129,8 +129,7 @@ def new_job(request): new_item['job_status'] = request.get('job_status') new_item[Definition.Container.get_str_con_image_name()] = request.get(Definition.Container.get_str_con_image_name()) new_item['user_token'] = request.get(Definition.get_str_token()) - new_item['time_to_live'] = request.get('ttl') - new_item['start_time'] = request.get('start_time') + new_item['volatile'] = request.get('volatile') LookUpTable.Jobs.__jobs[new_id] = new_item return True @@ -149,9 +148,6 @@ def update_job(request): old_job = LookUpTable.Jobs.__jobs[job_id] old_job['job_status'] = request.get('job_status') - #old_job['user_token'] = request.get(Definition.get_str_token()) # should not be able to change user who requested job? - if 'ttl' in request: - old_job['time_to_live'] = request.get('ttl') return True diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 419687d..3b6a2dd 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -49,13 +49,16 @@ def on_put(self, req, res): if Definition.Docker.get_str_finished() in req.params: # a container is shutting down, update containers + # TODO: add some kind of safety mechanism to really make sure no new requests have been sent to this container before acknowledging removal? if LookUpTable.remove_container( req.params.get(Definition.Container.get_str_con_image_name()), req.params.get(Definition.Container.Status.get_str_sid()) ): format_response_string(res, falcon.HTTP_200, "Container successfully removed") + # NOTE: container will terminate as soon as it reads this response! else: format_response_string(res, falcon.HTTP_400, "Could not remove container from table!") + # NOTE: container will continue as before when it reads this response! return @@ -289,8 +292,9 @@ def on_get(self, req, res): def on_post(self, req, res): # check token and request type is provided - req_data = json.loads(str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of body data if they exist - + req_raw = (str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of body data if they exist + print(req_raw) + req_data = json.loads(req_raw) if not Definition.get_str_token() in req.params: res.body = "Token is required." res.content_type = "String" @@ -358,8 +362,6 @@ def rand_id(N): # add job to table job_params['job_id'] = job_id job_params['job_status'] = JobStatus.INIT - job_params['ttl'] = 30 - job_params['start_time'] = LService.get_current_timestamp() if not LookUpTable.Jobs.new_job(job_params): return None diff --git a/harmonicIO/worker/configuration.json b/harmonicIO/worker/configuration.json index 058243f..50789e9 100644 --- a/harmonicIO/worker/configuration.json +++ b/harmonicIO/worker/configuration.json @@ -6,5 +6,6 @@ "master_addr": "192.168.1.5", "master_port": 8080, "node_data_port_range": [9000, 9010], - "std_idle_time": 5 + "std_idle_time": 5, + "container_idle_timeout": 1 } diff --git a/harmonicIO/worker/configuration.py b/harmonicIO/worker/configuration.py index 2946ddc..62b2304 100644 --- a/harmonicIO/worker/configuration.py +++ b/harmonicIO/worker/configuration.py @@ -12,6 +12,7 @@ class Setting(object): __master_port = None __node_external_addr = None __node_internal_addr = None + __container_idle_timeout = None @staticmethod def set_node_addr(addr=None): @@ -87,6 +88,10 @@ def get_min_worker(): @staticmethod def get_node_external_addr(): return Setting.__node_external_addr + + @staticmethod + def get_container_idle_timeout(): + return Setting.__container_idle_timeout @staticmethod def read_cfg_from_file(): @@ -107,7 +112,8 @@ def read_cfg_from_file(): Definition.get_str_idle_time() in cfg and \ Definition.get_str_master_addr() in cfg and \ Definition.get_str_master_port() in cfg and \ - Definition.get_str_node_external_addr() in cfg and \ + Definition.get_str_container_idle_timeout() in cfg and \ + Definition.get_str_node_internal_addr() in cfg and \ Definition.get_str_node_internal_addr(): # Check port number is int or not if not isinstance(cfg[Definition.get_str_node_port()], int): @@ -137,6 +143,7 @@ def read_cfg_from_file(): Setting.__master_addr = cfg[Definition.get_str_master_addr()].strip() Setting.__master_port = cfg[Definition.get_str_master_port()] Setting.__node_external_addr = cfg[Definition.get_str_node_external_addr()].strip().lower() + Setting.__container_idle_timeout = cfg[Definition.get_str_container_idle_timeout()] * 60 # convert idle time from minute to seconds # Check for auto node name if Setting.__node_name.lower() == "auto": diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 1a4c7a0..9b38540 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -91,12 +91,12 @@ def delete_container(self, cont_shortid): return False - def run_container(self, container_name): + def run_container(self, container_name, volatile): def get_ports_setting(expose, ports): return {str(expose) + '/tcp': ports} - def get_env_setting(expose, a_port): + def get_env_setting(expose, a_port, volatile): ret = dict() ret[Definition.Docker.HDE.get_str_node_name()] = container_name ret[Definition.Docker.HDE.get_str_node_addr()] = Setting.get_node_addr() @@ -106,7 +106,8 @@ def get_env_setting(expose, a_port): ret[Definition.Docker.HDE.get_str_master_port()] = Setting.get_master_port() ret[Definition.Docker.HDE.get_str_std_idle_time()] = Setting.get_std_idle_time() ret[Definition.Docker.HDE.get_str_token()] = Setting.get_token() - + if not volatile: + ret[Definition.Docker.HDE.get_str_idle_timeout()] = Setting.get_container_idle_timeout() return ret port = self.__get_available_port() @@ -122,7 +123,7 @@ def get_env_setting(expose, a_port): stderr=True, stdout=True, ports=get_ports_setting(expose_port, port), - environment=get_env_setting(expose_port, port)) + environment=get_env_setting(expose_port, port, volatile)) import time time.sleep(1) print('..created container, logs:') diff --git a/harmonicIO/worker/docker_service.py b/harmonicIO/worker/docker_service.py index 7a7d9a7..084ae1b 100644 --- a/harmonicIO/worker/docker_service.py +++ b/harmonicIO/worker/docker_service.py @@ -9,8 +9,8 @@ def init(): DockerService.__docker_master = DockerMaster() @staticmethod - def create_container(container_name): - return DockerService.__docker_master.run_container(container_name) + def create_container(container_name, volatile): + return DockerService.__docker_master.run_container(container_name, volatile) @staticmethod def get_containers_status(): diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index a069577..19d97b5 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -7,15 +7,17 @@ # function that sends request to master to notify exiting of a container -def notify_master_container_finished(csid): +def notify_master_container_finished(container, csid): from urllib.request import urlopen, Request - notify_url = "http://{}:{}/{}?token=None&{}={}".format( + notify_url = "http://{}:{}/{}?token=None&{}={}&{}={}".format( Setting.get_master_addr, Setting.get_master_port, Definition.REST.get_str_status(), Definition.Docker.get_str_finished(), - csid + csid, + Definition.Container.get_str_con_image_name, + container ) req = Request(url=notify_url, method='PUT') resp = urlopen(req) @@ -58,9 +60,10 @@ def on_get(self, req, res): if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_finished(): res.content_type = "String" - container_short_id = req.params.get(Definition.Container.Status.get_str_sid) - if container_short_id: - if not notify_master_container_finished(container_short_id): + short_id = req.params.get(Definition.Container.Status.get_str_sid) + name = req.params.get(Definition.Container.get_str_con_image_name) + if short_id and name: + if not notify_master_container_finished(name, short_id): res.body = "Could not find requested container running." res.status = falcon.HTTP_404 else: @@ -100,7 +103,11 @@ def on_post(self, req, res): res.content_type = "String" res.status = falcon.HTTP_401 - result = DockerService.create_container(data[Definition.Container.get_str_con_image_name()]) + volatile = False + if data.get('volatile'): + volatile = True # only set to true if user has actually provided the 'volatile' : true data in request + + result = DockerService.create_container(data[Definition.Container.get_str_con_image_name()], volatile) if result: res.body = "{}".format(result) From c9a218f9f16f5eb669fff215baa5e16c8b1b339a Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Tue, 6 Mar 2018 17:34:09 +0000 Subject: [PATCH 50/64] bugfixes, setting up todo's --- harmonicIO/master/meta_table.py | 1 + harmonicIO/master/rest_service.py | 2 +- harmonicIO/worker/docker_master.py | 2 +- harmonicIO/worker/rest_service.py | 11 +++++------ 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 59bc9ec..38f913f 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -52,6 +52,7 @@ def update_container(dict_input): if dict_input[Definition.Container.get_str_con_image_name()] not in LookUpTable.Containers.__containers: LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] = [] + ## TODO: make sure no duplicate of existing container in list of containers is added LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].append(dict_input) @staticmethod diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 3b6a2dd..9977fe8 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -52,7 +52,7 @@ def on_put(self, req, res): # TODO: add some kind of safety mechanism to really make sure no new requests have been sent to this container before acknowledging removal? if LookUpTable.remove_container( req.params.get(Definition.Container.get_str_con_image_name()), - req.params.get(Definition.Container.Status.get_str_sid()) + req.params.get(Definition.Docker.get_str_finished()) ): format_response_string(res, falcon.HTTP_200, "Container successfully removed") # NOTE: container will terminate as soon as it reads this response! diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 9b38540..6c2df03 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -106,7 +106,7 @@ def get_env_setting(expose, a_port, volatile): ret[Definition.Docker.HDE.get_str_master_port()] = Setting.get_master_port() ret[Definition.Docker.HDE.get_str_std_idle_time()] = Setting.get_std_idle_time() ret[Definition.Docker.HDE.get_str_token()] = Setting.get_token() - if not volatile: + if volatile: ret[Definition.Docker.HDE.get_str_idle_timeout()] = Setting.get_container_idle_timeout() return ret diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 19d97b5..813e8d8 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -11,12 +11,12 @@ def notify_master_container_finished(container, csid): from urllib.request import urlopen, Request notify_url = "http://{}:{}/{}?token=None&{}={}&{}={}".format( - Setting.get_master_addr, - Setting.get_master_port, + Setting.get_master_addr(), + Setting.get_master_port(), Definition.REST.get_str_status(), Definition.Docker.get_str_finished(), csid, - Definition.Container.get_str_con_image_name, + Definition.Container.get_str_con_image_name(), container ) req = Request(url=notify_url, method='PUT') @@ -59,9 +59,8 @@ def on_get(self, req, res): # Container is exiting, notify master to update if req.params[Definition.Docker.get_str_command()] == Definition.Docker.get_str_finished(): res.content_type = "String" - - short_id = req.params.get(Definition.Container.Status.get_str_sid) - name = req.params.get(Definition.Container.get_str_con_image_name) + short_id = req.params.get(Definition.Container.Status.get_str_sid()) + name = req.params.get(Definition.Container.get_str_con_image_name()) if short_id and name: if not notify_master_container_finished(name, short_id): res.body = "Could not find requested container running." From 4cf31ba83873752892f01f03220640cf9cefe6a8 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 7 Mar 2018 12:11:51 +0000 Subject: [PATCH 51/64] made stream connector use daemon testing container, fixed bug in lookuptable container update --- harmonicIO/master/meta_table.py | 4 ++-- harmonicIO/stream_connector/__main__.py | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 38f913f..c76b2c3 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -52,8 +52,8 @@ def update_container(dict_input): if dict_input[Definition.Container.get_str_con_image_name()] not in LookUpTable.Containers.__containers: LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] = [] - ## TODO: make sure no duplicate of existing container in list of containers is added - LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].append(dict_input) + if not dict_input in LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]]: + LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].append(dict_input) @staticmethod def get_candidate_container(image_name): diff --git a/harmonicIO/stream_connector/__main__.py b/harmonicIO/stream_connector/__main__.py index 8e4e5a1..d9be424 100755 --- a/harmonicIO/stream_connector/__main__.py +++ b/harmonicIO/stream_connector/__main__.py @@ -4,13 +4,12 @@ # Example program # The use case number can be defined by varying the number in use case variable MASTER_DATA = { - "MASTER_ADDR": "192.168.0.137", + "MASTER_ADDR": "192.168.1.5", "MASTER_PORT": 8080 } PROCC_DATA = { - "batch_hist": "beirbear/test:batch_hist", - "batch_sum": "beirbear/test:batch_sum", + "daemon_test": "snapple49/hio-daemondev:test", "OS": "ubuntu" } @@ -38,15 +37,14 @@ def read_data_from_file(path): # Define data to test d_list = { - 'batch_hist': read_data_from_file('stream_connector/lena512.bmp'), - 'batch_sum': read_data_from_file('stream_connector/str_array.txt') + 'daemon_test': read_data_from_file('harmonicIO/stream_connector/lena512.bmp') } # Generate a sample stream order stream_order = [0] * ITEM_NUMBER import random for i in range(ITEM_NUMBER): - stream_order[i] = (i, 'batch_sum' if (random.randrange(1, 100) % len(d_list)) == 0 else 'batch_hist') + stream_order[i] = (i, 'daemon_test' if (random.randrange(1, 100) % len(d_list)) == 0 else 'daemon_test') return stream_order, d_list From ac3655bd5d3d8e8b2a1d87a2e351606e36f08c58 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 7 Mar 2018 16:04:29 +0000 Subject: [PATCH 52/64] added naive master-based upscaling depending on msg queue length --- harmonicIO/master/__main__.py | 20 +++++++++++++------- harmonicIO/master/jobqueue.py | 31 ++++++++++++++++++++++++++++--- harmonicIO/master/meta_table.py | 2 +- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index 1e73464..e66477c 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -5,16 +5,20 @@ Master entry point """ -def run_queue_manager(): +def run_queue_manager(manager): """ Run job queue manager thread can be several managers to manage large amount of queued jobs """ import threading - manager = JobManager() - manager_thread = threading.Thread(target=manager.job_queuer) - manager_thread.daemon = True - manager_thread.start() + for i in range(manager.queuer_threads): + manager_thread = threading.Thread(target=manager.job_queuer) + manager_thread.daemon = True + manager_thread.start() + + supervisor_thread = threading.Thread(target=manager.queue_supervisor) + supervisor_thread.daemon = True + supervisor_thread.start() SysOut.out_string("Job queue started") @@ -77,6 +81,8 @@ def run_msg_service(): # Binding commander to the rest service and enable REST service pool.submit(run_rest_service) - + + # create a job manager which is a queue manager supervising the creation of containers, both via user and auto-scaling + jobManager = JobManager(30, 100, 5, 1) # Run job queue manager thread - pool.submit(run_queue_manager) + pool.submit(run_queue_manager, jobManager) diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index 3b901a1..c9b4796 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -4,8 +4,17 @@ from .meta_table import LookUpTable from harmonicIO.general.definition import Definition, JobStatus from harmonicIO.general.services import SysOut +import time +from .messaging_system import MessagesQueue -class JobManager(): +class JobManager: + + def __init__(self, interval, threshold, increment, queuers): + self.__supervisor_interval = interval + self.__supervisor_increment = increment + self.__supervisor_threshold = threshold + self.queuer_threads = queuers + def find_available_worker(self, container): candidates = [] @@ -60,11 +69,27 @@ def job_queuer(self): SysOut.err_string("Response from worker threw exception!") job_data['job_status'] = JobStatus.FAILED break # break makes it stop trying to create new containers as soon as one fails, is this desireable? - ## NOTE: can get really ugly, need to cleanup containers that started OR let user know how many were started instead?? + + ## NOTE: can get really ugly, need to cleanup containers that started (rollback) OR let user know how many were started instead?? or retry failed ones? LookUpTable.Jobs.update_job(job_data) JobQueue.q.task_done() -class JobQueue(object): + def queue_supervisor(self): + while True: + time.sleep(self.__supervisor_interval) ## NOTE: this is probably a very tuneable parameter for later + msg_queue = MessagesQueue.verbose() + for container in msg_queue: + if int(msg_queue[container]) > self.__supervisor_threshold: + job_data = { + Definition.Container.get_str_con_image_name() : container, + 'num' : self.__supervisor_increment, + 'volatile' : True + } + JobQueue.queue_new_job(job_data) + + + +class JobQueue: q = queue.Queue() @staticmethod diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index c76b2c3..fe28aaf 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -74,7 +74,7 @@ def del_container(container_name, short_id): # conts is list of containers with same c_name # List filter code based on: https://stackoverflow.com/questions/1235618/python-remove-dictionary-from-list - # Removes + # Removes item with specified short_id from list conts[:] = [con for con in conts if con.get(Definition.Container.Status.get_str_sid()) != short_id] return True From 880209c1a5d3b0fdb5866b672f1014f5c752e605 Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Mon, 12 Mar 2018 17:03:27 +0100 Subject: [PATCH 53/64] added comment describing parameters in master, temporary fix --- harmonicIO/master/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index e66477c..282a94e 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -83,6 +83,6 @@ def run_msg_service(): pool.submit(run_rest_service) # create a job manager which is a queue manager supervising the creation of containers, both via user and auto-scaling - jobManager = JobManager(30, 100, 5, 1) + jobManager = JobManager(30, 100, 5, 1) # 30 seconds interval between checking, 100 requests in queue before increase, add 5 new containers, 1 thread for queue supervisor # Run job queue manager thread pool.submit(run_queue_manager, jobManager) From afe565ccb0aa1e3578c7a9820f83e2dc74ad18f6 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Thu, 15 Mar 2018 17:50:35 +0000 Subject: [PATCH 54/64] made worker port in master rest call follow config, working on container port issues --- harmonicIO/general/services.py | 1 + harmonicIO/master/jobqueue.py | 13 ++++++------- harmonicIO/master/meta_table.py | 12 ++++++++---- harmonicIO/master/rest_service.py | 1 - harmonicIO/worker/docker_master.py | 13 +++++++++++++ harmonicIO/worker/rest_service.py | 17 +++++++++++------ 6 files changed, 39 insertions(+), 18 deletions(-) diff --git a/harmonicIO/general/services.py b/harmonicIO/general/services.py index c130b6c..1a9fb15 100644 --- a/harmonicIO/general/services.py +++ b/harmonicIO/general/services.py @@ -81,6 +81,7 @@ def get_machine_status(setting, role): body[Definition.get_str_node_name()] = setting.get_node_name() body[Definition.get_str_node_role()] = role body[Definition.get_str_node_addr()] = setting.get_node_addr() + body[Definition.get_str_node_port()] = setting.get_node_port() body[Definition.get_str_load1()] = load1 body[Definition.get_str_load5()] = load5 body[Definition.get_str_load15()] = load15 diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index c9b4796..32150df 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -27,9 +27,9 @@ def find_available_worker(self, container): for worker in workers: curr_worker = workers[worker] if container in curr_worker[Definition.REST.get_str_local_imgs()]: - candidates.append((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_load5()], True)) + candidates.append(((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_node_port()]), curr_worker[Definition.get_str_load5()], True)) else: - candidates.append((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_load5()], False)) + candidates.append(((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_node_port()]), curr_worker[Definition.get_str_load5()], False)) candidates.sort(key=lambda x: (-x[2], x[1])) # sort candidate workers first on availability of image, then on load (avg load last 5 mins) for candidate in candidates: @@ -38,9 +38,9 @@ def find_available_worker(self, container): return None - def start_job(self, target_worker, job_data): + def start_job(self, target, job_data): # send request to worker - worker_url = "http://{}:8081/docker?token=None&command=create".format(target_worker) + worker_url = "http://{}:{}/docker?token=None&command=create".format(target[0], target[1]) req_data = bytes(json.dumps(job_data), 'utf-8') resp = urlopen(worker_url, req_data) # NOTE: might need increase in timeout to allow download of large container images!!! @@ -56,10 +56,9 @@ def job_queuer(self): num_of_conts = job_data.get('num') job_sids = [] for i in range(num_of_conts): - target = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name())) + target = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name()))[0] try: - worker_ip = target[0] - sid = self.start_job(worker_ip, job_data) + sid = self.start_job(target, job_data) if sid: job_sids.append(sid) if len(job_sids) == num_of_conts: diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index fe28aaf..20d9333 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -39,7 +39,6 @@ def get_container_object(req): ret[Definition.REST.Batch.get_str_batch_status()] = int(req.params[Definition.REST.Batch.get_str_batch_status()]) ret[Definition.Container.get_str_con_image_name()] = req.params[Definition.Container.get_str_con_image_name()].strip() ret[Definition.Container.Status.get_str_sid()] = req.params[Definition.Container.Status.get_str_sid()] - ret[Definition.get_str_last_update()] = Services.get_current_timestamp() return ret @@ -51,9 +50,14 @@ def verbose(): def update_container(dict_input): if dict_input[Definition.Container.get_str_con_image_name()] not in LookUpTable.Containers.__containers: LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] = [] - - if not dict_input in LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]]: - LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].append(dict_input) + + # TODO: not quite done here + for cont in LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]]: + cont.pop(Definition.get_str_last_update(), None) + if not dict_input == cont: + dict_input[Definition.get_str_last_update()] = Services.get_current_timestamp() + LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].append(dict_input) + cont[Definition.get_str_last_update()] = Services.get_current_timestamp() @staticmethod def get_candidate_container(image_name): diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index 9977fe8..c1744e2 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -293,7 +293,6 @@ def on_get(self, req, res): def on_post(self, req, res): # check token and request type is provided req_raw = (str(req.stream.read(req.content_length or 0), 'utf-8')) # create dict of body data if they exist - print(req_raw) req_data = json.loads(req_raw) if not Definition.get_str_token() in req.params: res.body = "Token is required." diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 6c2df03..8634be8 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -55,6 +55,13 @@ def __get_available_port(self): return None + def __update_ports(self): + for port in self.__ports: + if port.is_port_open(): + port.status = CStatus.BUSY + else: + port.status = CStatus.AVAILABLE + def get_containers_status(self): def get_container_status(input): @@ -109,6 +116,12 @@ def get_env_setting(expose, a_port, volatile): if volatile: ret[Definition.Docker.HDE.get_str_idle_timeout()] = Setting.get_container_idle_timeout() return ret + + + for port in self.__ports: + SysOut.debug_string(str(port.port) + ": " + str(port.is_port_open())) + + self.__update_ports() port = self.__get_available_port() expose_port = 80 diff --git a/harmonicIO/worker/rest_service.py b/harmonicIO/worker/rest_service.py index 813e8d8..17910fa 100644 --- a/harmonicIO/worker/rest_service.py +++ b/harmonicIO/worker/rest_service.py @@ -9,6 +9,7 @@ # function that sends request to master to notify exiting of a container def notify_master_container_finished(container, csid): from urllib.request import urlopen, Request + from urllib.error import HTTPError notify_url = "http://{}:{}/{}?token=None&{}={}&{}={}".format( Setting.get_master_addr(), @@ -19,13 +20,17 @@ def notify_master_container_finished(container, csid): Definition.Container.get_str_con_image_name(), container ) - req = Request(url=notify_url, method='PUT') - resp = urlopen(req) + try: + req = Request(url=notify_url, method='PUT') + resp = urlopen(req) - if resp.getcode() == 200: - # container was removed on master - return True - return False + if resp.getcode() == 200: + # container was removed on master + return True + except HTTPError as e: + SysOut.err_string(e.msg) + return False + class ContainerService(object): From 23d4fcbdf2276f2ad4f0b0ae93221c5676caa52c Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Fri, 16 Mar 2018 16:47:52 +0000 Subject: [PATCH 55/64] ports issue fixed(?), now job queuer tries to send requests to all available workers if one runs out of ports --- harmonicIO/master/jobqueue.py | 41 ++++++++++++++++++++++-------- harmonicIO/master/meta_table.py | 25 +++++++++++------- harmonicIO/worker/docker_master.py | 4 --- 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index 32150df..30e8531 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -19,12 +19,13 @@ def __init__(self, interval, threshold, increment, queuers): def find_available_worker(self, container): candidates = [] workers = LookUpTable.Workers.verbose() - + SysOut.debug_string("Found workers: " + str(workers)) if not workers: return None # loop through workers and make tuples of worker IP, load and if requested container is available locally for worker in workers: + curr_worker = workers[worker] if container in curr_worker[Definition.REST.get_str_local_imgs()]: candidates.append(((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_node_port()]), curr_worker[Definition.get_str_load5()], True)) @@ -32,11 +33,11 @@ def find_available_worker(self, container): candidates.append(((curr_worker[Definition.get_str_node_addr()], curr_worker[Definition.get_str_node_port()]), curr_worker[Definition.get_str_load5()], False)) candidates.sort(key=lambda x: (-x[2], x[1])) # sort candidate workers first on availability of image, then on load (avg load last 5 mins) - for candidate in candidates: - if float(candidate[1]) < 0.5: - return candidate + for candidate in list(candidates): + if not float(candidate[1]) < 0.5: + candidates.remove(candidate) # remove candidates with higher than 50% cpu load - return None + return candidates def start_job(self, target, job_data): # send request to worker @@ -55,19 +56,37 @@ def job_queuer(self): job_data = JobQueue.q.get() num_of_conts = job_data.get('num') job_sids = [] - for i in range(num_of_conts): - target = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name()))[0] + targets = self.find_available_worker(job_data.get(Definition.Container.get_str_con_image_name())) + SysOut.debug_string("Candidate workers: " + str(targets)) + n = 0 + while len(job_sids) < num_of_conts: + target = targets[n][0] + SysOut.debug_string("Attempting to send request to worker: " + str(target)) try: sid = self.start_job(target, job_data) if sid: job_sids.append(sid) + else: # not sure how urllib handles a 400 response, but this needs to happen either in case of exception or sid = False + if n < len(targets)-1: # other candidates are available + n+= 1 + continue + else: + job_data['job_status'] = JobStatus.FAILED + break + if len(job_sids) == num_of_conts: job_data['job_status'] = JobStatus.READY - job_data[Definition.Container.Status.get_str_sid()] = job_sids + job_data[Definition.Container.Status.get_str_sid()] = job_sids #TODO: add this in metatable + except: - SysOut.err_string("Response from worker threw exception!") - job_data['job_status'] = JobStatus.FAILED - break # break makes it stop trying to create new containers as soon as one fails, is this desireable? + SysOut.debug_string("Response from worker threw exception!") + if n < len(targets)-1: # other candidates are available + SysOut.usr_string("We got to other candidates available!!!!!!! -------------------------------------") + n+= 1 + continue + else: + job_data['job_status'] = JobStatus.FAILED + break # break makes it stop trying to create new containers as soon as one fails, is this desireable? Probaby as now it is unlikely that there is any hosting capability ## NOTE: can get really ugly, need to cleanup containers that started (rollback) OR let user know how many were started instead?? or retry failed ones? LookUpTable.Jobs.update_job(job_data) diff --git a/harmonicIO/master/meta_table.py b/harmonicIO/master/meta_table.py index 20d9333..cac7b6b 100644 --- a/harmonicIO/master/meta_table.py +++ b/harmonicIO/master/meta_table.py @@ -48,16 +48,23 @@ def verbose(): @staticmethod def update_container(dict_input): - if dict_input[Definition.Container.get_str_con_image_name()] not in LookUpTable.Containers.__containers: - LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] = [] - - # TODO: not quite done here - for cont in LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]]: - cont.pop(Definition.get_str_last_update(), None) - if not dict_input == cont: - dict_input[Definition.get_str_last_update()] = Services.get_current_timestamp() + + def cont_in_table(dict_input): + conts = LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] + for cont in conts: + if dict_input.get(Definition.Container.Status.get_str_sid()) == cont.get(Definition.Container.Status.get_str_sid()): + return cont + return None + + if dict_input[Definition.Container.get_str_con_image_name()] not in LookUpTable.Containers.__containers: # no containers for this image exist + new_cont = [dict_input] + LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]] = new_cont + else: + cont = cont_in_table(dict_input) + if not cont: # this specific container is not already in table LookUpTable.Containers.__containers[dict_input[Definition.Container.get_str_con_image_name()]].append(dict_input) - cont[Definition.get_str_last_update()] = Services.get_current_timestamp() + else: # container was already in table, update timestamp + cont[Definition.get_str_last_update()] = Services.get_current_timestamp() @staticmethod def get_candidate_container(image_name): diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 8634be8..7a93bb6 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -116,10 +116,6 @@ def get_env_setting(expose, a_port, volatile): if volatile: ret[Definition.Docker.HDE.get_str_idle_timeout()] = Setting.get_container_idle_timeout() return ret - - - for port in self.__ports: - SysOut.debug_string(str(port.port) + ": " + str(port.is_port_open())) self.__update_ports() From 739936d4dc188430766441180279ecdc263854a9 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Mon, 26 Mar 2018 14:11:22 +0000 Subject: [PATCH 56/64] added json output option to master status verbose --- harmonicIO/master/rest_service.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/harmonicIO/master/rest_service.py b/harmonicIO/master/rest_service.py index c1744e2..9c5d744 100644 --- a/harmonicIO/master/rest_service.py +++ b/harmonicIO/master/rest_service.py @@ -244,6 +244,8 @@ def on_get(self, req, res): if req.params[Definition.MessagesQueue.get_str_command()] == "verbose": data = LookUpTable.verbose() data['MSG'] = MessagesQueue.verbose() + if req.params.get('format') == 'JSON': + data = json.dumps(data) res.body = str(data) res.content_type = "String" From 950cc6e4282e4aa13d9ad9ccfb3b1d57f5ee8ef4 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Mon, 26 Mar 2018 14:11:58 +0000 Subject: [PATCH 57/64] added worker node port for rest service to container HDE data --- harmonicIO/general/definition.py | 4 ++++ harmonicIO/master/configuration.json | 2 +- harmonicIO/worker/docker_master.py | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/harmonicIO/general/definition.py b/harmonicIO/general/definition.py index 77c0539..d011046 100644 --- a/harmonicIO/general/definition.py +++ b/harmonicIO/general/definition.py @@ -311,6 +311,10 @@ def get_str_node_name(): def get_str_node_addr(): return "HDE_NODE_ADDR" + @staticmethod + def get_str_node_rest_port(): + return "HDE_NODE_REST_PORT" + @staticmethod def get_str_node_data_port(): return "HDE_NODE_DATA_PORT" diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index eb391d6..f1fe2d8 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -1,6 +1,6 @@ { "node_name": "PE Master", - "master_addr": "192.168.1.5", + "master_addr": "192.168.1.17", "node_port": 8080, "node_data_port_range": [8090,8090], "std_idle_time": 5 diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 7a93bb6..0020cf8 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -107,6 +107,7 @@ def get_env_setting(expose, a_port, volatile): ret = dict() ret[Definition.Docker.HDE.get_str_node_name()] = container_name ret[Definition.Docker.HDE.get_str_node_addr()] = Setting.get_node_addr() + ret[Definition.Docker.HDE.get_str_node_rest_port()] = Setting.get_node_port() ret[Definition.Docker.HDE.get_str_node_data_port()] = expose ret[Definition.Docker.HDE.get_str_node_forward_port()] = a_port ret[Definition.Docker.HDE.get_str_master_addr()] = Setting.get_master_addr() From 1f09af017c4a6e00fbb1dd39a5512ad3f527b6ba Mon Sep 17 00:00:00 2001 From: Snapple49 Date: Tue, 3 Apr 2018 14:34:25 +0200 Subject: [PATCH 58/64] added default value for volatility --- harmonicIO/worker/docker_master.py | 2 +- harmonicIO/worker/docker_service.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/harmonicIO/worker/docker_master.py b/harmonicIO/worker/docker_master.py index 0020cf8..ab79694 100644 --- a/harmonicIO/worker/docker_master.py +++ b/harmonicIO/worker/docker_master.py @@ -98,7 +98,7 @@ def delete_container(self, cont_shortid): return False - def run_container(self, container_name, volatile): + def run_container(self, container_name, volatile=False): def get_ports_setting(expose, ports): return {str(expose) + '/tcp': ports} diff --git a/harmonicIO/worker/docker_service.py b/harmonicIO/worker/docker_service.py index 084ae1b..0fc7098 100644 --- a/harmonicIO/worker/docker_service.py +++ b/harmonicIO/worker/docker_service.py @@ -9,7 +9,7 @@ def init(): DockerService.__docker_master = DockerMaster() @staticmethod - def create_container(container_name, volatile): + def create_container(container_name, volatile=False): return DockerService.__docker_master.run_container(container_name, volatile) @staticmethod From c641e60b6d1d669d87c2a4be3744780d09f0d99a Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Tue, 3 Apr 2018 14:43:31 +0200 Subject: [PATCH 59/64] Update Readme.md --- Readme.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Readme.md b/Readme.md index 81ce59d..303a865 100644 --- a/Readme.md +++ b/Readme.md @@ -36,6 +36,22 @@ $ sudo ./runWorker.sh * Start an (example) processing container on the worker (localhost) node (replacing ``): + --- Update from Oliver: + +hosting containers: +``` +"curl -X POST "http://:/jobRequest?token=None&type=new_job" --data '{"c_name" : , "num" : , "volatile" : }'" +``` +NOTE: spelling is important, `true`=volatile container, `false`=involatile container. responds with an ID of the container creation job + +polling status of container request: +``` +"curl http://:/jobRequest?token=None&type=poll_job&job_id=" +``` +, checks status of the container hosting job with provided ID, READY means all contaiers are started and running, INITIALIZING means not all have started yet, FAILED means not all could be started but some may still be available + + --- + We use the example container `benblamey/hio-example:latest`, which can be built from https://github.com/HASTE-project/HarmonicPE ``` $ curl -X POST "http://:8081/docker?token=None&command=create" --data '{"c_name" : "benblamey/hio-example:latest", "num" : 1}' From 5f7091ec7094f50e2797560954154da9628180af Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Tue, 3 Apr 2018 14:45:18 +0200 Subject: [PATCH 60/64] Update Readme.md Put my new instructions above Ben's old instructions --- Readme.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Readme.md b/Readme.md index 303a865..a368b30 100644 --- a/Readme.md +++ b/Readme.md @@ -34,9 +34,8 @@ $ sudo ./runMaster.sh $ sudo ./runWorker.sh ``` -* Start an (example) processing container on the worker (localhost) node (replacing ``): - --- Update from Oliver: +* Update from Oliver: hosting containers: ``` @@ -50,7 +49,8 @@ polling status of container request: ``` , checks status of the container hosting job with provided ID, READY means all contaiers are started and running, INITIALIZING means not all have started yet, FAILED means not all could be started but some may still be available - --- + +* Start an (example) processing container on the worker (localhost) node (replacing ``): We use the example container `benblamey/hio-example:latest`, which can be built from https://github.com/HASTE-project/HarmonicPE ``` From 1843597fe103ebdf3200c70031f45a3cddf76a7e Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 4 Apr 2018 14:10:48 +0200 Subject: [PATCH 61/64] added option to enable/disable autoscaling in master config file --- harmonicIO/master/__main__.py | 12 ++++++++---- harmonicIO/master/configuration.json | 3 ++- harmonicIO/master/configuration.py | 6 ++++++ harmonicIO/master/jobqueue.py | 3 +++ 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/harmonicIO/master/__main__.py b/harmonicIO/master/__main__.py index 282a94e..e1e25ba 100644 --- a/harmonicIO/master/__main__.py +++ b/harmonicIO/master/__main__.py @@ -16,12 +16,15 @@ def run_queue_manager(manager): manager_thread.daemon = True manager_thread.start() - supervisor_thread = threading.Thread(target=manager.queue_supervisor) - supervisor_thread.daemon = True - supervisor_thread.start() - SysOut.out_string("Job queue started") + if Setting.get_autoscaling(): + supervisor_thread = threading.Thread(target=manager.queue_supervisor) + supervisor_thread.daemon = True + supervisor_thread.start() + SysOut.out_string("Autoscaling supervisor started") + + def run_rest_service(): """ @@ -84,5 +87,6 @@ def run_msg_service(): # create a job manager which is a queue manager supervising the creation of containers, both via user and auto-scaling jobManager = JobManager(30, 100, 5, 1) # 30 seconds interval between checking, 100 requests in queue before increase, add 5 new containers, 1 thread for queue supervisor + # Run job queue manager thread pool.submit(run_queue_manager, jobManager) diff --git a/harmonicIO/master/configuration.json b/harmonicIO/master/configuration.json index f1fe2d8..746dcdc 100644 --- a/harmonicIO/master/configuration.json +++ b/harmonicIO/master/configuration.json @@ -3,5 +3,6 @@ "master_addr": "192.168.1.17", "node_port": 8080, "node_data_port_range": [8090,8090], - "std_idle_time": 5 + "std_idle_time": 5, + "auto_scaling_enabled" : false } diff --git a/harmonicIO/master/configuration.py b/harmonicIO/master/configuration.py index 4096761..e27625a 100644 --- a/harmonicIO/master/configuration.py +++ b/harmonicIO/master/configuration.py @@ -9,6 +9,7 @@ class Setting(object): __node_data_port_stop = None __std_idle_time = None __token = "None" + __autoscaling = None @staticmethod def set_node_addr(addr=None): @@ -58,6 +59,10 @@ def get_std_idle_time(): def get_token(): return Setting.__token + @staticmethod + def get_autoscaling(): + return Setting.__autoscaling + @staticmethod def read_cfg_from_file(): from harmonicIO.general.services import Services, SysOut @@ -99,6 +104,7 @@ def read_cfg_from_file(): Setting.__node_data_port_start = cfg[Definition.get_str_data_port_range()][0] Setting.__node_data_port_stop = cfg[Definition.get_str_data_port_range()][1] Setting.__std_idle_time = cfg[Definition.get_str_idle_time()] + Setting.__autoscaling = cfg.get('auto_scaling_enabled') SysOut.out_string("Load setting successful.") try: diff --git a/harmonicIO/master/jobqueue.py b/harmonicIO/master/jobqueue.py index 30e8531..c6e7730 100644 --- a/harmonicIO/master/jobqueue.py +++ b/harmonicIO/master/jobqueue.py @@ -93,6 +93,9 @@ def job_queuer(self): JobQueue.q.task_done() def queue_supervisor(self): + """ + Thread that handles autoscaling + """ while True: time.sleep(self.__supervisor_interval) ## NOTE: this is probably a very tuneable parameter for later msg_queue = MessagesQueue.verbose() From 806d0c8716f6a4ab3d378f5f9f98bef06d4c2101 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 4 Apr 2018 14:17:29 +0200 Subject: [PATCH 62/64] fixed typo and minutes to seconds conversion according to PR feedback --- harmonicIO/worker/__main__.py | 4 ++-- harmonicIO/worker/configuration.json | 2 +- harmonicIO/worker/configuration.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/harmonicIO/worker/__main__.py b/harmonicIO/worker/__main__.py index 5cb2bcf..ae8b6f0 100644 --- a/harmonicIO/worker/__main__.py +++ b/harmonicIO/worker/__main__.py @@ -20,8 +20,8 @@ def run_rest_service(): def start_gc_thread(): - carbage_collector = GarbageCollector(10) - gc_thread = threading.Thread(carbage_collector.collect_exited_containers()) + garbage_collector = GarbageCollector(10) + gc_thread = threading.Thread(garbage_collector.collect_exited_containers()) gc_thread.daemon = True gc_thread.start() diff --git a/harmonicIO/worker/configuration.json b/harmonicIO/worker/configuration.json index 50789e9..81a1405 100644 --- a/harmonicIO/worker/configuration.json +++ b/harmonicIO/worker/configuration.json @@ -7,5 +7,5 @@ "master_port": 8080, "node_data_port_range": [9000, 9010], "std_idle_time": 5, - "container_idle_timeout": 1 + "container_idle_timeout": 60 } diff --git a/harmonicIO/worker/configuration.py b/harmonicIO/worker/configuration.py index 62b2304..f434122 100644 --- a/harmonicIO/worker/configuration.py +++ b/harmonicIO/worker/configuration.py @@ -143,7 +143,7 @@ def read_cfg_from_file(): Setting.__master_addr = cfg[Definition.get_str_master_addr()].strip() Setting.__master_port = cfg[Definition.get_str_master_port()] Setting.__node_external_addr = cfg[Definition.get_str_node_external_addr()].strip().lower() - Setting.__container_idle_timeout = cfg[Definition.get_str_container_idle_timeout()] * 60 # convert idle time from minute to seconds + Setting.__container_idle_timeout = cfg[Definition.get_str_container_idle_timeout()] # Check for auto node name if Setting.__node_name.lower() == "auto": From e5ed841556cb64a7fa32cec3bb0c608e865def4a Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Wed, 4 Apr 2018 14:41:58 +0200 Subject: [PATCH 63/64] Update Readme.md Cleared up my additions a bit, added note about autoscaling enable/disable --- Readme.md | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/Readme.md b/Readme.md index a368b30..5739acc 100644 --- a/Readme.md +++ b/Readme.md @@ -10,6 +10,26 @@ Stream_Connector - client for sending tasks for distributed execution. Forked from https://github.com/beirbear/HarmonicIO +## Update from Oliver: +* Autoscaling: + +An important feature added is auto-scaling, but to not break production it can be disabled. To enable/disable, set the field "auto_scaling_enabled" to true/false in the master's configuration.json file + +* Hosting containers: +``` +curl -X POST "http://:/jobRequest?token=None&type=new_job" --data '{"c_name" : , "num" : , "volatile" : }' +``` +NOTE: spelling is important, `true`=volatile container, `false`=involatile container. responds with an ID of the container creation job + +* Polling status of container request: +``` +curl http://:/jobRequest?token=None&type=poll_job&job_id= +``` +, checks status of the container hosting job with provided ID, READY means all contaiers are started and running, INITIALIZING means not all have started yet, FAILED means not all could be started but some may still be available + +* Stream connector + +Use just as before ## Quickstart @@ -34,22 +54,6 @@ $ sudo ./runMaster.sh $ sudo ./runWorker.sh ``` - -* Update from Oliver: - -hosting containers: -``` -"curl -X POST "http://:/jobRequest?token=None&type=new_job" --data '{"c_name" : , "num" : , "volatile" : }'" -``` -NOTE: spelling is important, `true`=volatile container, `false`=involatile container. responds with an ID of the container creation job - -polling status of container request: -``` -"curl http://:/jobRequest?token=None&type=poll_job&job_id=" -``` -, checks status of the container hosting job with provided ID, READY means all contaiers are started and running, INITIALIZING means not all have started yet, FAILED means not all could be started but some may still be available - - * Start an (example) processing container on the worker (localhost) node (replacing ``): We use the example container `benblamey/hio-example:latest`, which can be built from https://github.com/HASTE-project/HarmonicPE From 6d4ce4fddaf56f5871594cd321290a2eb6424e66 Mon Sep 17 00:00:00 2001 From: Oliver Stein Date: Thu, 2 Jul 2020 21:35:37 +0200 Subject: [PATCH 64/64] Create IRM_default_parameters.md --- IRM_default_parameters.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 IRM_default_parameters.md diff --git a/IRM_default_parameters.md b/IRM_default_parameters.md new file mode 100644 index 0000000..521ad51 --- /dev/null +++ b/IRM_default_parameters.md @@ -0,0 +1,18 @@ +## IRM configuration parameters + +This table shows a list of the parameters available to tune the IRM components in HIO, including a description of each parameter and the default value. + +| Parameter name | Explanation | Default value | +|:-:|:-------------|------:| +| `packing_interval` | Interval in seconds between performing the bin packing algorithm | 1 | +| `default_cpu_share` | Initial guess of CPU size of unencountered container images | 0.125 | +| `profiling_interval` | Interval in seconds between how often worker profiler updates queued container requests | 4 | +| `predictor_interval` | Interval in seconds between predicting load and determining scaling action | 1 | +| `lower_rate_limit` | Lower positive threshold for load predictor | 2 | +| `upper_rate_limit` | Upper positive threshold for load predictor | 5 | +| `slowdown_rate` | Negative threshold for load predictor | -2 | +| `queue_size_limit` | Message queue length limit for load predictor | 10 | +| `scaleup_waiting_time` | Cool-down time for load predictor scaleup actions | 10 | +| `large_scaleup_amount` | Large scaleup quantity for load predictor | 2 | +| `small_scaleup_amount` | Small scaleup quantity for load predictor | 1 | +| `container_request_TTL` | Initial time-to-live counter for container requests | 1 |