Skip to content

Commit 8aa4c9e

Browse files
authored
Merge pull request #134 from IBM/automlflow_anomaly_detection
Automlflow anomaly detection
2 parents 3011477 + 1c51176 commit 8aa4c9e

File tree

3 files changed

+206
-15
lines changed

3 files changed

+206
-15
lines changed

AutoAI/src/AutoAI.jl

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,24 @@ include("carettspredictor.jl")
3232
using .CaretTSPredictors
3333
export CaretTSPredictor, carettsdriver
3434

35+
include("autoanomalydetection.jl")
36+
using .AutoAnomalyDetections
37+
export AutoAnomalyDetection
38+
export autoaddriver
39+
3540
include("automlflowclassification.jl")
3641
using .AutoMLFlowClassifications
42+
export AutoMLFlowClassification
3743
export mlfcldriver
3844

3945
include("automlflowregression.jl")
4046
using .AutoMLFlowRegressions
47+
export AutoMLFlowRegression
4148
export mlfregdriver
4249

43-
include("autoanomalydetector.jl")
44-
using .AutoAnomalyDetectors
45-
export autoaddriver
50+
include("automlflowanomalydetection.jl")
51+
using .AutoMLFlowAnomalyDetections
52+
export AutoMLFlowAnomalyDetection
53+
export mlfaddriver
4654

4755
end # module AutoAI

AutoAI/src/autoanomalydetector.jl renamed to AutoAI/src/autoanomalydetection.jl

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
module AutoAnomalyDetectors
1+
module AutoAnomalyDetections
22
using Distributed
33
using AutoMLPipeline
44
using DataFrames: DataFrame, nrow, rename!
@@ -13,14 +13,14 @@ using ..SKAnomalyDetectors
1313

1414
import ..AbsTypes: fit, fit!, transform, transform!
1515
export fit, fit!, transform, transform!
16-
export AutoAnomalyDetector, autoaddriver
16+
export AutoAnomalyDetection, autoaddriver
1717

1818
# define customized type
19-
mutable struct AutoAnomalyDetector <: Workflow
19+
mutable struct AutoAnomalyDetection <: Workflow
2020
name::String
2121
model::Dict{Symbol,Any}
2222

23-
function AutoAnomalyDetector(args=Dict())
23+
function AutoAnomalyDetection(args=Dict())
2424
default_args = Dict(
2525
:name => "autoad",
2626
:votepercent => 0.0, # output all votepercent if 0.0, otherwise get specific votepercent
@@ -32,15 +32,15 @@ mutable struct AutoAnomalyDetector <: Workflow
3232
end
3333
end
3434

35-
function fit!(autodt::AutoAnomalyDetector, X::DataFrame, Y::Vector)
35+
function fit!(autodt::AutoAnomalyDetection, X::DataFrame, Y::Vector)
3636
return nothing
3737
end
3838

39-
function fit(clfb::AutoAnomalyDetector, X::DataFrame, Y::Vector)
39+
function fit(clfb::AutoAnomalyDetection, X::DataFrame, Y::Vector)
4040
return nothing
4141
end
4242

43-
function transform!(autodt::AutoAnomalyDetector, X::DataFrame)
43+
function transform!(autodt::AutoAnomalyDetection, X::DataFrame)
4444
# detect anomalies using caret
4545
dfres1 = DataFrame()
4646
for learner in keys(caretadlearner_dict)
@@ -66,28 +66,35 @@ function transform!(autodt::AutoAnomalyDetector, X::DataFrame)
6666
mdfm = hcat(mdf, DataFrame(admean=mean.(eachrow(mdf))))
6767
# filter anomalies based on mean cut-off
6868
votepercent = autodt.model[:votepercent]
69+
# if votepercent is 0.0 return all 0.1 to 1.0 votepercent output
70+
# otherwise, return specific output for given votepercent
6971
if votepercent == 0.0
7072
dfad = @distributed (hcat) for cutoff in 0.1:0.1:1.0
7173
ndx = map(x -> x >= cutoff, mdfm.admean)
7274
n = string(cutoff)
7375
DataFrame(n => ndx)
7476
end
75-
return dfad
77+
return hcat(X, dfad)
7678
else
7779
ndx = map(x -> x >= votepercent, mdfm.admean)
7880
n = string(votepercent)
7981
dfad = DataFrame(n => ndx)
80-
return dfad
82+
return hcat(X, dfad)
8183
end
8284
end
8385

84-
function transform(autodt::AutoAnomalyDetector, X::DataFrame)
86+
function transform(autodt::AutoAnomalyDetection, X::DataFrame)
87+
autodtc = deepcopy(autodt)
88+
return transform!(autodtc, X)
8589
end
8690

8791
function autoaddriver()
88-
autoaddt = AutoAnomalyDetector(Dict(:votepercent => 0.0))
92+
autoaddt = AutoAnomalyDetection(Dict(:votepercent => 0.0))
8993
X = vcat(5 * cos.(-10:10), sin.(-30:30), 3 * cos.(-10:10), 2 * tan.(-10:10), sin.(-30:30)) |> x -> DataFrame([x], :auto)
90-
fit_transform!(autoaddt, X)
94+
@info fit_transform!(autoaddt, X) |> x -> first(x, 5)
95+
autoaddt1 = AutoAnomalyDetection(Dict(:votepercent => 0.3))
96+
@info fit_transform!(autoaddt1, X) |> x -> first(x, 5)
97+
9198
end
9299

93100

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
module AutoMLFlowAnomalyDetections
2+
using Statistics
3+
using Serialization
4+
import PythonCall
5+
const PYC = PythonCall
6+
7+
using DataFrames: DataFrame
8+
using Random
9+
using ..AbsTypes
10+
using ..Utils
11+
using ..AutoAnomalyDetections
12+
using ..AutoMLPipeline: getiris
13+
14+
import ..AbsTypes: fit, fit!, transform, transform!
15+
export fit, fit!, transform, transform!
16+
export mlfaddriver, AutoMLFlowAnomalyDetection
17+
18+
const MLF = PYC.pynew()
19+
const REQ = PYC.pynew()
20+
21+
function __init__()
22+
PYC.pycopy!(MLF, PYC.pyimport("mlflow"))
23+
PYC.pycopy!(REQ, PYC.pyimport("requests"))
24+
end
25+
26+
mutable struct AutoMLFlowAnomalyDetection <: Workflow
27+
name::String
28+
model::Dict{Symbol,Any}
29+
30+
function AutoMLFlowAnomalyDetection(args=Dict())
31+
default_args = Dict(
32+
:name => "AutoAnomalDetection",
33+
:projectname => "AutoAnomalDetection",
34+
:url => "http://localhost:8080",
35+
:description => "Automated Anomaly Detection",
36+
:projecttype => "anomalydetection",
37+
:artifact_name => "autoad.bin",
38+
:votepercent => 0.0,
39+
:impl_args => Dict()
40+
)
41+
cargs = nested_dict_merge(default_args, args)
42+
#cargs[:name] = cargs[:name] * "_" * randstring(3)
43+
experiment_tags = Dict(
44+
"projectname" => cargs[:projectname],
45+
"projecttype" => cargs[:projecttype],
46+
"notes" => cargs[:description]
47+
)
48+
# check if mlflow server exists
49+
try
50+
httpget = getproperty(REQ, "get")
51+
res = httpget(cargs[:url] * "/health")
52+
catch
53+
@error("Mlflow Server Unreachable")
54+
exit(1)
55+
end
56+
MLF.set_tracking_uri(uri=cargs[:url])
57+
name = cargs[:name]
58+
experiment = MLF.search_experiments(filter_string="name = \'$name\'")
59+
if PYC.pylen(experiment) != 0
60+
MLF.set_experiment(experiment[0].name)
61+
else
62+
theexperiment = MLF.create_experiment(name=name, tags=experiment_tags)
63+
cargs[:experiment_id] = theexperiment
64+
end
65+
new(cargs[:name], cargs)
66+
end
67+
end
68+
69+
function AutoMLFlowAnomalyDetection(name::String, args::Dict)
70+
AutoMLFlowAnomalyDetection(Dict(:name => name, args...))
71+
end
72+
73+
function AutoMLFlowAnomalyDetection(name::String; args...)
74+
AutoMLFlowAnomalyDetection(Dict(Dict(pairs(args))...))
75+
end
76+
77+
function (obj::AutoMLFlowAnomalyDetection)(; args...)
78+
model = obj.model
79+
cargs = nested_dict_merge(model, Dict(pairs(args)))
80+
obj.model = cargs
81+
return obj
82+
end
83+
84+
function fit!(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame, Y::Vector)
85+
# end any running experiment
86+
# MLF.end_run()
87+
# generate run name
88+
run_name = mlfad.model[:name] * "_" * "fit" * "_" * randstring(3)
89+
mlfad.model[:run_name] = run_name
90+
MLF.set_experiment(mlfad.model[:name])
91+
MLF.start_run(run_name=run_name)
92+
# get run_id
93+
run = MLF.active_run()
94+
mlfad.model[:run_id] = run.info.run_id
95+
# automate anomaly detection
96+
votepercent = mlfad.model[:votepercent]
97+
autoad = AutoAnomalyDetection(Dict(:votepercent => votepercent))
98+
adoutput = fit_transform!(autoad, X, Y)
99+
MLF.log_param("ADOutput", adoutput)
100+
MLF.log_metric("votepercent", autoad.model[:votepercent])
101+
# save model in mlflow
102+
artifact_name = mlfad.model[:artifact_name]
103+
# use temporary directory
104+
tmpdir = tempdir()
105+
artifact_location = joinpath(tmpdir, artifact_name)
106+
serialize(artifact_location, autoad)
107+
MLF.log_artifact(artifact_location)
108+
# save model in memory
109+
mlfad.model[:autoad] = autoad
110+
bestmodel_uri = MLF.get_artifact_uri(artifact_path=artifact_name)
111+
# save model uri location
112+
mlfad.model[:bestmodel_uri] = bestmodel_uri
113+
MLF.end_run()
114+
end
115+
116+
function fit(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame, Y::Vector)
117+
mlfcopy = deepcopy(mlfad)
118+
fit!(mlfcopy, X, Y)
119+
return mlfcopy
120+
end
121+
122+
function transform!(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame)
123+
MLF.end_run()
124+
# download model artifact
125+
run_id = mlfad.model[:run_id]
126+
artifact_name = mlfad.model[:artifact_name]
127+
128+
try
129+
model_artifacts = MLF.artifacts.list_artifacts(run_id=run_id)
130+
@assert model_artifacts[0].path |> string == "autoad.bin"
131+
catch e
132+
@info e
133+
throw("Artifact $artifact_name does not exist in run_id = $run_id")
134+
end
135+
136+
run_name = mlfad.model[:name] * "_" * "transform" * "_" * randstring(3)
137+
mlfad.model[:run_name] = run_name
138+
MLF.set_experiment(mlfad.model[:name])
139+
MLF.start_run(run_name=run_name)
140+
pylocalpath = MLF.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_name)
141+
bestmodel = deserialize(string(pylocalpath))
142+
Y = transform!(bestmodel, X)
143+
MLF.log_param("output", Y)
144+
MLF.end_run()
145+
return Y
146+
end
147+
148+
function transform(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame)
149+
mlfadc = deepcopy(mlfad)
150+
return transform!(mlfadc, X)
151+
end
152+
153+
function mlfaddriver()
154+
155+
X = vcat(5 * cos.(-10:10), sin.(-30:30), 3 * cos.(-10:10), 2 * tan.(-10:10), sin.(-30:30)) |> x -> DataFrame([x], :auto)
156+
157+
mlfad = AutoMLFlowAnomalyDetection()
158+
Yc = fit_transform!(mlfad, X)
159+
println(Yc |> x -> first(x, 5))
160+
161+
# test prediction using exisiting trained model from artifacts
162+
run_id = mlfad.model[:run_id]
163+
newmlad = AutoMLFlowAnomalyDetection(Dict(:run_id => run_id))
164+
newmlad = AutoMLFlowAnomalyDetection()
165+
newmlad(; run_id=run_id)
166+
Yn = transform!(newmlad, X)
167+
println(Yc |> x -> first(x, 5))
168+
169+
mlvad = AutoMLFlowAnomalyDetection(Dict(:votepercent => 0.5))
170+
Yc = fit_transform!(mlvad, X)
171+
println(Yc |> x -> first(x, 5))
172+
173+
return nothing
174+
end
175+
176+
end

0 commit comments

Comments
 (0)