Skip to content

Commit 5c71c70

Browse files
skara9bmcutler
andauthored
[Feature:System] Revamp worker setup and fix QEMU (#10530)
### Please check if the PR fulfills these requirements: * [ ] Tests for the changes have been added/updated (if possible) * [ ] Documentation has been updated/added if relevant * [ ] Screenshots are attached to Github PR if visual/UI changes were made ### What is the current behavior? <!-- List issue if it fixes/closes/implements one using the "Fixes #<number>" or "Closes #<number>" syntax --> Worker configuration currently fails on QEMU due to lack of support for private networking. ### What is the new behavior? The QEMU plugin has been modified to add support for socket networking. The worker management configuration has been revamped to better accomodate the networking needs of the new QEMU plugin setup. Testing instructions: See Submitty/submitty.github.io#621 --------- Co-authored-by: Barb Cutler <[email protected]>
1 parent 36a09bf commit 5c71c70

File tree

8 files changed

+424
-119
lines changed

8 files changed

+424
-119
lines changed

.pylintrc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ ignore-paths=
8989
.setup/bin/create_untrusted_users.py,
9090
.setup/bin/partial_reset.py,
9191
.setup/bin/update_repo_version.py,
92+
.setup/bin/refresh_vagrant_workers.py,
9293
.setup/bin/reset_system.py,
9394
.setup/bin/code_watcher.py,
9495
.setup/bin/setup_sample_emails.py,
@@ -134,7 +135,9 @@ ignore-paths=
134135
migration/tests/test_handle_migration.py,
135136
migration/tests/test_dumper.py,
136137
sample_files/sample_CSV/test_grades.py,
137-
sample_files/sample_CSV/verify.py
138+
sample_files/sample_CSV/verify.py,
139+
vagrant-workers/workers.py,
140+
vagrant-workers/generate_workers.py
138141

139142
[FORMAT]
140143
max-line-length=100

.setup/CONFIGURE_SUBMITTY.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -554,20 +554,6 @@ def write(x=''):
554554
}
555555
}
556556

557-
vagrant_workers_json = os.path.join(SUBMITTY_REPOSITORY, '.vagrant', 'workers.json')
558-
if os.path.isfile(vagrant_workers_json):
559-
with open(vagrant_workers_json) as f:
560-
vagrant_workers = json.load(f, object_hook=OrderedDict)
561-
562-
for worker, data in vagrant_workers.items():
563-
worker_dict[worker] = {
564-
"capabilities": capabilities,
565-
"address": data["ip_addr"],
566-
"username": "submitty",
567-
"num_autograding_workers": NUM_GRADING_SCHEDULER_WORKERS,
568-
"enabled": True
569-
}
570-
571557
with open(WORKERS_JSON, 'w') as workers_file:
572558
json.dump(worker_dict, workers_file, indent=4)
573559

.setup/bin/refresh_vagrant_workers.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import os
2+
import shutil
3+
import pwd
4+
import json
5+
import subprocess
6+
from collections import OrderedDict
7+
8+
SUBMITTY_REPOSITORY = os.environ['SUBMITTY_REPOSITORY']
9+
SUBMITTY_INSTALL_DIR = os.environ['SUBMITTY_INSTALL_DIR']
10+
DAEMON_USER = os.environ['DAEMON_USER']
11+
SUPERVISOR_USER = 'submitty'
12+
13+
vagrant_workers_path = os.path.join(SUBMITTY_REPOSITORY, '.vagrant', 'workers.json')
14+
autograding_workers_path = os.path.join(SUBMITTY_INSTALL_DIR, 'config', 'autograding_workers.json')
15+
16+
print("Loading existing data...")
17+
with open(vagrant_workers_path) as file:
18+
vagrant_workers_data = json.load(file, object_pairs_hook=OrderedDict)
19+
20+
with open(autograding_workers_path) as file:
21+
autograding_workers_data = json.load(file, object_pairs_hook=OrderedDict)
22+
23+
if 'version' in vagrant_workers_data and type(vagrant_workers_data['version']) is int:
24+
provider = vagrant_workers_data['provider']
25+
vagrant_workers_data = vagrant_workers_data['workers']
26+
else:
27+
print("This script requires a worker configuration of v2 or greater. Please regenerate your configuration with 'vagrant workers generate'.")
28+
exit(1)
29+
print("Done loading data")
30+
print()
31+
32+
print("Generating SSH credentials...")
33+
shutil.rmtree("/tmp/worker_keys", True)
34+
os.makedirs("/tmp/worker_keys", 0o500)
35+
daemon_stat = pwd.getpwnam(DAEMON_USER)
36+
os.chown("/tmp/worker_keys", daemon_stat.pw_uid, daemon_stat.pw_gid)
37+
38+
DAEMON_HOME = os.path.realpath(subprocess.check_output(['su', DAEMON_USER, '-c', 'echo $HOME']).strip())
39+
if not os.path.exists(DAEMON_HOME):
40+
print("Error: could not find home directory for daemon user")
41+
exit(1)
42+
43+
shutil.rmtree(os.path.join(DAEMON_HOME, b'.ssh'), True)
44+
subprocess.run(['su', DAEMON_USER, '-c', "ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N ''"], check=True)
45+
print("Done generating")
46+
print()
47+
48+
ssh_config = ''
49+
successful_machines = []
50+
for name, data in vagrant_workers_data.items():
51+
print("Attempting to connect to " + name)
52+
shutil.copyfile(f"{SUBMITTY_REPOSITORY}/.vagrant/machines/{name}/{provider}/private_key", f"/tmp/worker_keys/{name}")
53+
os.chown(f"/tmp/worker_keys/{name}", daemon_stat.pw_uid, daemon_stat.pw_gid)
54+
os.chmod(f"/tmp/worker_keys/{name}", 0o400)
55+
w = subprocess.run(['su', DAEMON_USER, '-c', f"scp -i /tmp/worker_keys/{name} -o StrictHostKeyChecking=no ~/.ssh/id_rsa.pub root@{data['ip_addr']}:/tmp/workerkey"])
56+
w = subprocess.run(['su', DAEMON_USER, '-c', f"ssh -i /tmp/worker_keys/{name} -o StrictHostKeyChecking=no root@{data['ip_addr']} \"chown {SUPERVISOR_USER}:{SUPERVISOR_USER} /tmp/workerkey && su {SUPERVISOR_USER} -c \\\"mkdir -p ~/.ssh && mv /tmp/workerkey ~/.ssh/authorized_keys\\\"\""])
57+
if w.returncode == 0:
58+
print("Connected to " + name)
59+
successful_machines.append(name)
60+
ssh_config += f"Host {name}\n HostName {data['ip_addr']}\n IdentityFile ~/.ssh/id_rsa\n User submitty\n"
61+
else:
62+
print("Failed to connect to " + name)
63+
64+
shutil.rmtree("/tmp/worker_keys", True)
65+
print()
66+
67+
print("Updating SSH configuration...")
68+
ssh_config_path = os.path.join(DAEMON_HOME, b'.ssh', b'config')
69+
with open(ssh_config_path, 'w') as file:
70+
file.write(ssh_config)
71+
os.chown(ssh_config_path, daemon_stat.pw_uid, daemon_stat.pw_gid)
72+
print("Successfully updated")
73+
74+
print("Writing new autograding configuration...")
75+
new_autograding_data = OrderedDict()
76+
new_autograding_data['primary'] = autograding_workers_data['primary']
77+
78+
total = 0
79+
enabled = 0
80+
for name, data in vagrant_workers_data.items():
81+
worker_data = OrderedDict(autograding_workers_data['primary'])
82+
worker_data['username'] = SUPERVISOR_USER
83+
worker_data['address'] = data['ip_addr']
84+
if name not in successful_machines:
85+
worker_data['enabled'] = False
86+
else:
87+
enabled += 1
88+
total += 1
89+
new_autograding_data[name] = worker_data
90+
91+
with open(autograding_workers_path, 'w') as file:
92+
json.dump(new_autograding_data, file, indent=4)
93+
print(f"Configuration saved with {enabled}/{total} machines enabled")
94+
print()
95+
96+
print("DONE")

.setup/install_system.sh

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ alias migrator='python3 ${SUBMITTY_REPOSITORY}/migration/run_migrator.py -c ${SU
182182
alias vagrant_info='cat /etc/motd'
183183
alias ntp_sync='service ntp stop && ntpd -gq && service ntp start'
184184
alias recreate_sample_courses='sudo bash /usr/local/submitty/GIT_CHECKOUT/Submitty/.setup/bin/recreate_sample_courses.sh'
185+
alias refresh_vagrant_workers='python3 /usr/local/submitty/GIT_CHECKOUT/Submitty/.setup/bin/refresh_vagrant_workers.py'
185186
systemctl start submitty_autograding_shipper
186187
systemctl start submitty_autograding_worker
187188
systemctl start submitty_daemon_jobs_handler
@@ -329,18 +330,6 @@ fi
329330

330331
if ! cut -d ':' -f 1 /etc/passwd | grep -q ${DAEMON_USER} ; then
331332
useradd -m -c "First Last,RoomNumber,WorkPhone,HomePhone" "${DAEMON_USER}" -s /bin/bash
332-
if [ ${WORKER} == 0 ] && [ ${DEV_VM} == 1 ] && [ -f ${SUBMITTY_REPOSITORY}/.vagrant/workers.json ]; then
333-
echo -e "attempting to create ssh key for submitty_daemon..."
334-
su submitty_daemon -c "cd ~/"
335-
su submitty_daemon -c "ssh-keygen -b 2048 -t rsa -f ~/.ssh/id_rsa -q -N ''"
336-
su submitty_daemon -c "echo 'successfully created ssh key'"
337-
338-
while read -r IP
339-
do
340-
su submitty_daemon -c "sshpass -p 'submitty' ssh-copy-id -i ~/.ssh/id_rsa.pub -o StrictHostKeyChecking=no submitty@${IP}"
341-
done <<< "$(jq -r ".[].ip_addr" "${SUBMITTY_REPOSITORY}/.vagrant/workers.json")"
342-
echo "DONE"
343-
fi
344333
fi
345334

346335
# The VCS directories (/var/local/submitty/vcs) are owned by root:$DAEMONCGI_GROUP

Vagrantfile

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -79,29 +79,28 @@ base_boxes[:arm_bento] = "bento/ubuntu-22.04-arm64"
7979
base_boxes[:libvirt] = "generic/ubuntu2204"
8080
base_boxes[:arm_mac_qemu] = "perk/ubuntu-2204-arm64"
8181

82-
83-
def mount_folders(config, mount_options, type = nil)
82+
def mount_folders(config, mount_options, type = nil, host = '10.0.2.2')
8483
# ideally we would use submitty_daemon or something as the owner/group, but since that user doesn't exist
8584
# till post-provision (and this is mounted before provisioning), we want the group to be 'vagrant'
8685
# which is guaranteed to exist and that during install_system.sh we add submitty_daemon/submitty_php/etc to the
8786
# vagrant group so that they can write to this shared folder, primarily just for the log files
8887
owner = 'root'
8988
group = 'vagrant'
90-
config.vm.synced_folder '.', '/usr/local/submitty/GIT_CHECKOUT/Submitty', create: true, owner: owner, group: group, mount_options: mount_options, smb_host: '10.0.2.2', smb_username: `whoami`.chomp, type: type
89+
config.vm.synced_folder '.', '/usr/local/submitty/GIT_CHECKOUT/Submitty', create: true, owner: owner, group: group, mount_options: mount_options, smb_host: host, smb_username: `whoami`.chomp, type: type
9190

9291
optional_repos = %w(AnalysisTools AnalysisToolsTS Lichen RainbowGrades Tutorial CrashCourseCPPSyntax IntroQuantumComputing LichenTestData DockerImages DockerImagesRPI)
9392
optional_repos.each {|repo|
9493
repo_path = File.expand_path("../" + repo)
9594
if File.directory?(repo_path)
96-
config.vm.synced_folder repo_path, "/usr/local/submitty/GIT_CHECKOUT/" + repo, owner: owner, group: group, mount_options: mount_options, smb_host: '10.0.2.2', smb_username: `whoami`.chomp, type:type
95+
config.vm.synced_folder repo_path, "/usr/local/submitty/GIT_CHECKOUT/" + repo, owner: owner, group: group, mount_options: mount_options, smb_host: host, smb_username: `whoami`.chomp, type: type
9796
end
9897
}
9998
end
10099

101100
def get_workers()
102101
worker_file = File.join(__dir__, '.vagrant', 'workers.json')
103102
if File.file?(worker_file)
104-
return JSON.parse(File.read(worker_file), symbolize_names: true)
103+
return JSON.parse(File.read(worker_file), symbolize_names: true)[:workers]
105104
else
106105
return Hash[]
107106
end
@@ -144,22 +143,44 @@ Vagrant.configure(2) do |config|
144143
# that one) as well as making sure all non-primary ones have "autostart: false" set
145144
# so that when we do "vagrant up", it doesn't spin up those machines.
146145

147-
get_workers.map do |worker_name, data|
148-
config.vm.define worker_name do |ubuntu|
149-
ubuntu.vm.network 'private_network', ip: data[:ip_addr]
150-
ubuntu.vm.network 'forwarded_port', guest: 22, host: data[:ssh_port], id: 'ssh'
151-
ubuntu.vm.provision 'shell', inline: gen_script(worker_name, worker: true, base: base_box)
146+
if ARGV[0] == 'workers'
147+
if apple_silicon
148+
exec("arch", "-arm64", "python3", "vagrant-workers/workers.py", *ARGV[1..])
152149
end
150+
exec("python3", "vagrant-workers/workers.py", *ARGV[1..])
153151
end
154152

155-
vm_name = 'ubuntu-22.04'
156-
config.vm.define vm_name, primary: true do |ubuntu|
157-
ubuntu.vm.network 'forwarded_port', guest: 1511, host: ENV.fetch('VM_PORT_SITE', 1511)
158-
ubuntu.vm.network 'forwarded_port', guest: 8443, host: ENV.fetch('VM_PORT_WS', 8443)
159-
ubuntu.vm.network 'forwarded_port', guest: 5432, host: ENV.fetch('VM_PORT_DB', 16442)
160-
ubuntu.vm.network 'forwarded_port', guest: 7000, host: ENV.fetch('VM_PORT_SAML', 7001)
161-
ubuntu.vm.network 'forwarded_port', guest: 22, host: ENV.fetch('VM_PORT_SSH', 2222), id: 'ssh'
162-
ubuntu.vm.provision 'shell', inline: gen_script(vm_name, base: base_box)
153+
if ENV.fetch('WORKER_MODE', '0') == '1'
154+
get_workers.map do |worker_name, data|
155+
config.vm.define worker_name do |ubuntu|
156+
ubuntu.vm.network 'private_network', ip: data[:ip_addr]
157+
ubuntu.vm.network 'forwarded_port', guest: 22, host: data[:ssh_port], id: 'ssh' unless data[:ssh_port].nil?
158+
ubuntu.vm.provision 'shell', inline: gen_script(worker_name, worker: true, base: true)
159+
160+
ubuntu.vm.provider "qemu" do |qe, override|
161+
qe.ssh_host = data[:ip_addr]
162+
qe.ssh_port = 22
163+
qe.socket_fd = 3
164+
qe.mac_address = data[:mac_addr]
165+
mount_folders(override, [], 'smb', ENV.fetch('GATEWAY_IP', '10.0.2.2'))
166+
end
167+
end
168+
end
169+
else
170+
vm_name = 'ubuntu-22.04'
171+
config.vm.define vm_name, primary: true do |ubuntu|
172+
ubuntu.vm.network 'forwarded_port', guest: 1511, host: ENV.fetch('VM_PORT_SITE', 1511)
173+
ubuntu.vm.network 'forwarded_port', guest: 8443, host: ENV.fetch('VM_PORT_WS', 8443)
174+
ubuntu.vm.network 'forwarded_port', guest: 5432, host: ENV.fetch('VM_PORT_DB', 16442)
175+
ubuntu.vm.network 'forwarded_port', guest: 7000, host: ENV.fetch('VM_PORT_SAML', 7001)
176+
ubuntu.vm.network 'forwarded_port', guest: 22, host: ENV.fetch('VM_PORT_SSH', 2222), id: 'ssh'
177+
ubuntu.vm.provision 'shell', inline: gen_script(vm_name, base: base_box)
178+
179+
ubuntu.vm.provider "qemu" do |qe, override|
180+
qe.ssh_port = ENV.fetch('VM_PORT_SSH', 2222)
181+
mount_folders(override, [], 'smb')
182+
end
183+
end
163184
end
164185

165186
config.vm.provider 'virtualbox' do |vb, override|
@@ -264,10 +285,6 @@ Vagrant.configure(2) do |config|
264285

265286
qe.memory = "2G"
266287
qe.smp = 2
267-
268-
qe.ssh_port = ENV.fetch('VM_PORT_SSH', 2222)
269-
270-
mount_folders(override, [])
271288
end
272289

273290
config.vm.provision :shell, :inline => " sudo timedatectl set-timezone America/New_York", run: "once"

generate_workers.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

0 commit comments

Comments
 (0)