Skip to content

Commit 3011477

Browse files
authored
Merge pull request #133 from IBM/mlflow
Mlflow for autoregression and autoclassification
2 parents 1a53003 + 103c58f commit 3011477

12 files changed

+527
-383
lines changed

AutoAI/CondaPkg.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
[deps]
2+
mlflow = ""
23
pycaret = ""
4+
requests = ""

AutoAI/src/AutoAI.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,16 @@ include("carettspredictor.jl")
3232
using .CaretTSPredictors
3333
export CaretTSPredictor, carettsdriver
3434

35-
greet() = print("Hello World!")
35+
include("automlflowclassification.jl")
36+
using .AutoMLFlowClassifications
37+
export mlfcldriver
38+
39+
include("automlflowregression.jl")
40+
using .AutoMLFlowRegressions
41+
export mlfregdriver
42+
43+
include("autoanomalydetector.jl")
44+
using .AutoAnomalyDetectors
45+
export autoaddriver
46+
3647
end # module AutoAI

AutoAI/src/autoanomalydetector.jl

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
module AutoAnomalyDetectors
2+
using Distributed
3+
using AutoMLPipeline
4+
using DataFrames: DataFrame, nrow, rename!
5+
using AutoMLPipeline: score
6+
using Random
7+
using Statistics
8+
using ..AbsTypes
9+
using ..Utils
10+
using ..CaretAnomalyDetectors
11+
import ..CaretAnomalyDetectors.caretadlearner_dict
12+
using ..SKAnomalyDetectors
13+
14+
import ..AbsTypes: fit, fit!, transform, transform!
15+
export fit, fit!, transform, transform!
16+
export AutoAnomalyDetector, autoaddriver
17+
18+
# define customized type
19+
mutable struct AutoAnomalyDetector <: Workflow
20+
name::String
21+
model::Dict{Symbol,Any}
22+
23+
function AutoAnomalyDetector(args=Dict())
24+
default_args = Dict(
25+
:name => "autoad",
26+
:votepercent => 0.0, # output all votepercent if 0.0, otherwise get specific votepercent
27+
:impl_args => Dict()
28+
)
29+
cargs = nested_dict_merge(default_args, args)
30+
cargs[:name] = cargs[:name] * "_" * randstring(3)
31+
new(cargs[:name], cargs)
32+
end
33+
end
34+
35+
function fit!(autodt::AutoAnomalyDetector, X::DataFrame, Y::Vector)
36+
return nothing
37+
end
38+
39+
function fit(clfb::AutoAnomalyDetector, X::DataFrame, Y::Vector)
40+
return nothing
41+
end
42+
43+
function transform!(autodt::AutoAnomalyDetector, X::DataFrame)
44+
# detect anomalies using caret
45+
dfres1 = DataFrame()
46+
for learner in keys(caretadlearner_dict)
47+
model = CaretAnomalyDetector(learner)
48+
res = fit_transform!(model, X)
49+
mname = string(learner)
50+
dfres1 = hcat(dfres1, DataFrame(mname => res; makeunique=true))
51+
end
52+
53+
#detect anomalies using scikitlearn
54+
iso = SKAnomalyDetector("IsolationForest")
55+
eli = SKAnomalyDetector("EllipticEnvelope")
56+
osvm = SKAnomalyDetector("OneClassSVM")
57+
lcl = SKAnomalyDetector("LocalOutlierFactor")
58+
isores = fit_transform!(iso, X)
59+
elires = fit_transform!(eli, X)
60+
osvmres = fit_transform!(osvm, X)
61+
lclres = fit_transform!(lcl, X)
62+
dfres2 = DataFrame(iso=isores, eli=elires, osvm=osvmres, lcl=lclres)
63+
64+
# combine results and get mean anomaly for each row
65+
mdf = hcat(dfres1, dfres2)
66+
mdfm = hcat(mdf, DataFrame(admean=mean.(eachrow(mdf))))
67+
# filter anomalies based on mean cut-off
68+
votepercent = autodt.model[:votepercent]
69+
if votepercent == 0.0
70+
dfad = @distributed (hcat) for cutoff in 0.1:0.1:1.0
71+
ndx = map(x -> x >= cutoff, mdfm.admean)
72+
n = string(cutoff)
73+
DataFrame(n => ndx)
74+
end
75+
return dfad
76+
else
77+
ndx = map(x -> x >= votepercent, mdfm.admean)
78+
n = string(votepercent)
79+
dfad = DataFrame(n => ndx)
80+
return dfad
81+
end
82+
end
83+
84+
function transform(autodt::AutoAnomalyDetector, X::DataFrame)
85+
end
86+
87+
function autoaddriver()
88+
autoaddt = AutoAnomalyDetector(Dict(:votepercent => 0.0))
89+
X = vcat(5 * cos.(-10:10), sin.(-30:30), 3 * cos.(-10:10), 2 * tan.(-10:10), sin.(-30:30)) |> x -> DataFrame([x], :auto)
90+
fit_transform!(autoaddt, X)
91+
end
92+
93+
94+
end
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
module AutoMLFlowClassifications
2+
using Statistics
3+
using Serialization
4+
import PythonCall
5+
const PYC = PythonCall
6+
7+
using DataFrames: DataFrame
8+
using Random
9+
using ..AbsTypes
10+
using ..Utils
11+
using ..AutoClassifications
12+
using ..AutoMLPipeline: getiris
13+
14+
import ..AbsTypes: fit, fit!, transform, transform!
15+
export fit, fit!, transform, transform!
16+
export mlfcldriver, AutoMLFlowClassification
17+
18+
const MLF = PYC.pynew()
19+
const REQ = PYC.pynew()
20+
21+
function __init__()
22+
PYC.pycopy!(MLF, PYC.pyimport("mlflow"))
23+
PYC.pycopy!(REQ, PYC.pyimport("requests"))
24+
end
25+
26+
mutable struct AutoMLFlowClassification <: Workflow
27+
name::String
28+
model::Dict{Symbol,Any}
29+
30+
function AutoMLFlowClassification(args=Dict())
31+
default_args = Dict(
32+
:name => "AutoMLClassification",
33+
:projectname => "AutoMLClassification",
34+
:url => "http://localhost:8080",
35+
:description => "Automated Classification",
36+
:projecttype => "classification",
37+
:artifact_name => "autoclass.bin",
38+
:impl_args => Dict()
39+
)
40+
cargs = nested_dict_merge(default_args, args)
41+
#cargs[:name] = cargs[:name] * "_" * randstring(3)
42+
experiment_tags = Dict(
43+
"projectname" => cargs[:projectname],
44+
"projecttype" => cargs[:projecttype],
45+
"notes" => cargs[:description]
46+
)
47+
# check if mlflow server exists
48+
try
49+
httpget = getproperty(REQ, "get")
50+
res = httpget(cargs[:url] * "/health")
51+
catch
52+
@error("Mlflow Server Unreachable")
53+
exit(1)
54+
end
55+
MLF.set_tracking_uri(uri=cargs[:url])
56+
name = cargs[:name]
57+
experiment = MLF.search_experiments(filter_string="name = \'$name\'")
58+
if PYC.pylen(experiment) != 0
59+
MLF.set_experiment(experiment[0].name)
60+
else
61+
theexperiment = MLF.create_experiment(name=name, tags=experiment_tags)
62+
cargs[:experiment_id] = theexperiment
63+
end
64+
new(cargs[:name], cargs)
65+
end
66+
end
67+
68+
function AutoMLFlowClassification(name::String, args::Dict)
69+
AutoMLFlowClassification(Dict(:name => name, args...))
70+
end
71+
72+
function AutoMLFlowClassification(name::String; args...)
73+
AutoMLFlowClassification(Dict(Dict(pairs(args))...))
74+
end
75+
76+
function (obj::AutoMLFlowClassification)(; args...)
77+
model = obj.model
78+
cargs = nested_dict_merge(model, Dict(pairs(args)))
79+
obj.model = cargs
80+
return obj
81+
end
82+
83+
function fit!(mlfcl::AutoMLFlowClassification, X::DataFrame, Y::Vector)
84+
# end any running experiment
85+
# MLF.end_run()
86+
# generate run name
87+
run_name = mlfcl.model[:name] * "_" * "fit" * "_" * randstring(3)
88+
mlfcl.model[:run_name] = run_name
89+
MLF.set_experiment(mlfcl.model[:name])
90+
MLF.start_run(run_name=run_name)
91+
# get run_id
92+
run = MLF.active_run()
93+
mlfcl.model[:run_id] = run.info.run_id
94+
# automate classification
95+
autoclass = AutoClassification()
96+
fit_transform!(autoclass, X, Y)
97+
bestmodel = autoclass.model[:bestpipeline].model[:description]
98+
MLF.log_param("bestmodel", bestmodel)
99+
MLF.log_param("pipelines", autoclass.model[:dfpipelines].Description)
100+
MLF.log_metric("bestperformance", autoclass.model[:performance].mean[1])
101+
# save model in mlflow
102+
artifact_name = mlfcl.model[:artifact_name]
103+
# use temporary directory
104+
tmpdir = tempdir()
105+
artifact_location = joinpath(tmpdir, artifact_name)
106+
serialize(artifact_location, autoclass)
107+
MLF.log_artifact(artifact_location)
108+
# save model in memory
109+
mlfcl.model[:autoclass] = autoclass
110+
bestmodel_uri = MLF.get_artifact_uri(artifact_path=artifact_name)
111+
# save model uri location
112+
mlfcl.model[:bestmodel_uri] = bestmodel_uri
113+
MLF.end_run()
114+
end
115+
116+
function fit(mlfcl::AutoMLFlowClassification, X::DataFrame, Y::Vector)
117+
mlfcopy = deepcopy(mlfcl)
118+
fit!(mlfcopy, X, Y)
119+
return mlfcopy
120+
end
121+
122+
function transform!(mlfcl::AutoMLFlowClassification, X::DataFrame)
123+
MLF.end_run()
124+
# download model artifact
125+
run_id = mlfcl.model[:run_id]
126+
artifact_name = mlfcl.model[:artifact_name]
127+
128+
try
129+
model_artifacts = MLF.artifacts.list_artifacts(run_id=run_id)
130+
@assert model_artifacts[0].path |> string == "autoclass.bin"
131+
catch e
132+
@info e
133+
throw("Artifact $artifact_name does not exist in run_id = $run_id")
134+
end
135+
136+
run_name = mlfcl.model[:name] * "_" * "transform" * "_" * randstring(3)
137+
mlfcl.model[:run_name] = run_name
138+
MLF.set_experiment(mlfcl.model[:name])
139+
MLF.start_run(run_name=run_name)
140+
pylocalpath = MLF.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_name)
141+
bestmodel = deserialize(string(pylocalpath))
142+
Y = transform!(bestmodel, X)
143+
MLF.log_param("output", Y)
144+
MLF.end_run()
145+
return Y
146+
end
147+
148+
function mlfcldriver()
149+
df = getiris()
150+
X = df[:, 1:end-1]
151+
Y = df[:, end] |> collect
152+
153+
mlfclass = AutoMLFlowClassification()
154+
Yc = fit_transform!(mlfclass, X, Y)
155+
println("accuracy = ", mean(Y .== Yc))
156+
157+
# test prediction using exisiting trained model from artifacts
158+
run_id = mlfclass.model[:run_id]
159+
newmfclass = AutoMLFlowClassification(Dict(:run_id => run_id))
160+
newmfclass = AutoMLFlowClassification()
161+
newmfclass(; run_id=run_id)
162+
Yn = transform!(newmfclass, X)
163+
println("accuracy = ", mean(Yn .== Y))
164+
165+
return nothing
166+
end
167+
168+
end

0 commit comments

Comments
 (0)