Skip to content

Commit 860dbf2

Browse files
Merge pull request #36 from evaniacheng/main
Added Alexa Task Implementation
2 parents 353c0c1 + f0e61b4 commit 860dbf2

File tree

2 files changed

+224
-0
lines changed

2 files changed

+224
-0
lines changed

tasks/measurements/alexa/alexa.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
from typing import Dict, Union
2+
from netunicorn.base import Task, Failure
3+
import subprocess
4+
import pprint
5+
from ping3 import ping
6+
import csv
7+
import json
8+
9+
class AlexaWebsitesTask(Task):
10+
# Measure network metrics for a list of Alexa top websites.
11+
requirements = [
12+
"sudo apt-get install -y curl dnsutils traceroute",
13+
"pip install ping3"
14+
]
15+
16+
def __init__(self, domain: str = None, filepath: str = "alexa_websites.csv", output_path: str = None, top_k: int = 100, *args, **kwargs):
17+
super().__init__(*args, **kwargs)
18+
self.domain = domain
19+
self.filepath = filepath
20+
self.output_path = output_path
21+
self.top_k = top_k
22+
23+
def get_traceroute(self) -> Union[str, Failure]:
24+
try:
25+
result = subprocess.run(["traceroute", "-m", "10", self.domain], capture_output=True, text=True, check=True)
26+
return result.stdout
27+
except Exception as e:
28+
return Failure(f"Traceroute failed: {e}")
29+
30+
def measure_ping(self) -> Union[Dict[str, float], Failure]:
31+
try:
32+
ping_value = ping(self.domain)
33+
if ping_value is None:
34+
return Failure("Ping returned None.")
35+
return {"value": ping_value * 1000, "unit": "ms"}
36+
except Exception as e:
37+
return Failure(f"Ping failed: {e}")
38+
39+
def measure_dns_time(self) -> Union[Dict[str, float], Failure]:
40+
try:
41+
result = subprocess.run(["dig", self.domain], capture_output=True, text=True, check=True)
42+
for line in result.stdout.splitlines():
43+
if "Query time" in line:
44+
return {"value": float(line.split(":")[1].strip().split(" ")[0]), "unit": "ms"}
45+
return Failure("Query time not found in DNS response.")
46+
except Exception as e:
47+
return Failure(f"DNS resolution failed: {e}")
48+
49+
def measure_timing(self) -> Union[Dict[str, Dict[str,float]], Failure]:
50+
try:
51+
result = subprocess.run([
52+
"curl",
53+
"-o", "/dev/null",
54+
"-s",
55+
"-w",
56+
(
57+
"time_appconnect: %{time_appconnect}\n"
58+
"time_connect: %{time_connect}\n"
59+
"time_namelookup: %{time_namelookup}\n"
60+
"time_pretransfer: %{time_pretransfer}\n"
61+
"time_redirect: %{time_redirect}\n"
62+
"time_starttransfer: %{time_starttransfer}\n"
63+
"time_total: %{time_total}\n"
64+
),
65+
"-H", "Cache-Control: no-cache",
66+
f"https://{self.domain}",
67+
], capture_output=True, text=True, check=True)
68+
metrics = {
69+
key.strip(): {"value": float(value.strip()) * 1000, "unit": "ms"}
70+
for line in result.stdout.splitlines()
71+
for key, value in [line.split(": ", 1)]
72+
}
73+
return metrics
74+
except Exception as e:
75+
return Failure(f"Network Timing measurement failed: {e}")
76+
77+
@staticmethod
78+
def load_websites(filepath: str, top_k: int) -> list:
79+
# Load top k websites from a CSV file
80+
websites = []
81+
with open(filepath, 'r') as file:
82+
reader = csv.reader(file)
83+
for row in reader:
84+
if len(websites) < top_k:
85+
websites.append(row[1])
86+
else:
87+
break
88+
return websites
89+
90+
def run(self) -> Union[Dict[str, Dict], Failure]:
91+
if self.domain:
92+
# Run for a single domain
93+
return {
94+
"traceroute": self.get_traceroute(),
95+
"ping_time": self.measure_ping(),
96+
"dns_time": self.measure_dns_time(),
97+
"measure_timing": self.measure_timing(),
98+
}
99+
else:
100+
# Run for all websites in a file
101+
websites = self.load_websites(self.filepath, self.top_k)
102+
print(f"Loaded {len(websites)} websites.")
103+
104+
results = {}
105+
for website in websites:
106+
print(f"Processing: {website}")
107+
try:
108+
self.domain = website
109+
results[website] = self.run()
110+
except Exception as e:
111+
results[website] = Failure(f"Failed to process {website}: {e}")
112+
113+
# Save results to a JSON file if output_path is provided
114+
if self.output_path:
115+
print(f"Saving results to {self.output_path}")
116+
try:
117+
with open(self.output_path, "w") as f:
118+
json.dump(results, f, indent=4)
119+
except Exception as e:
120+
return Failure(f"Failed to write results to file: {e}")
121+
else:
122+
pprint.pp(results)
123+
124+
return results
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
1,google.com
2+
2,facebook.com
3+
3,youtube.com
4+
4,yahoo.com
5+
5,baidu.com
6+
6,wikipedia.org
7+
7,qq.com
8+
8,taobao.com
9+
9,twitter.com
10+
10,amazon.com
11+
11,linkedin.com
12+
12,live.com
13+
13,google.co.in
14+
14,sina.com.cn
15+
15,hao123.com
16+
16,blogspot.com
17+
17,weibo.com
18+
18,tmall.com
19+
19,vk.com
20+
20,wordpress.com
21+
21,yahoo.co.jp
22+
22,sohu.com
23+
23,yandex.ru
24+
24,ebay.com
25+
25,google.de
26+
26,bing.com
27+
27,pinterest.com
28+
28,google.co.uk
29+
29,163.com
30+
30,360.cn
31+
31,google.fr
32+
32,ask.com
33+
33,instagram.com
34+
34,google.co.jp
35+
35,tumblr.com
36+
36,msn.com
37+
37,google.com.br
38+
38,mail.ru
39+
39,microsoft.com
40+
40,xvideos.com
41+
41,paypal.com
42+
42,google.ru
43+
43,soso.com
44+
44,adcash.com
45+
45,google.es
46+
46,google.it
47+
47,imdb.com
48+
48,apple.com
49+
49,imgur.com
50+
50,neobux.com
51+
51,craigslist.org
52+
52,amazon.co.jp
53+
53,t.co
54+
54,xhamster.com
55+
55,stackoverflow.com
56+
56,reddit.com
57+
57,google.com.mx
58+
58,google.com.hk
59+
59,cnn.com
60+
60,google.ca
61+
61,fc2.com
62+
62,go.com
63+
63,ifeng.com
64+
64,bbc.co.uk
65+
65,vube.com
66+
66,people.com.cn
67+
67,blogger.com
68+
68,aliexpress.com
69+
69,odnoklassniki.ru
70+
70,wordpress.org
71+
71,alibaba.com
72+
72,gmw.cn
73+
73,adobe.com
74+
74,huffingtonpost.com
75+
75,google.com.tr
76+
76,xinhuanet.com
77+
77,googleusercontent.com
78+
78,youku.com
79+
79,godaddy.com
80+
80,pornhub.com
81+
81,akamaihd.net
82+
82,thepiratebay.se
83+
83,kickass.to
84+
84,google.com.au
85+
85,amazon.de
86+
86,clkmon.com
87+
87,ebay.de
88+
88,alipay.com
89+
89,google.pl
90+
90,espn.go.com
91+
91,dailymotion.com
92+
92,about.com
93+
93,bp.blogspot.com
94+
94,blogspot.in
95+
95,netflix.com
96+
96,vimeo.com
97+
97,dailymail.co.uk
98+
98,redtube.com
99+
99,rakuten.co.jp
100+
100,conduit.com

0 commit comments

Comments
 (0)