Skip to content

Commit 03bfa0a

Browse files
authored
Merge pull request #135 from IBM/refactor_mlflow_functions
refactor common functions
2 parents 8aa4c9e + 0b83ce8 commit 03bfa0a

File tree

4 files changed

+106
-208
lines changed

4 files changed

+106
-208
lines changed

AutoAI/src/automlflowanomalydetection.jl

Lines changed: 10 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ function __init__()
2323
PYC.pycopy!(REQ, PYC.pyimport("requests"))
2424
end
2525

26+
include("./mlflowutils.jl")
27+
2628
mutable struct AutoMLFlowAnomalyDetection <: Workflow
2729
name::String
2830
model::Dict{Symbol,Any}
@@ -39,29 +41,7 @@ mutable struct AutoMLFlowAnomalyDetection <: Workflow
3941
:impl_args => Dict()
4042
)
4143
cargs = nested_dict_merge(default_args, args)
42-
#cargs[:name] = cargs[:name] * "_" * randstring(3)
43-
experiment_tags = Dict(
44-
"projectname" => cargs[:projectname],
45-
"projecttype" => cargs[:projecttype],
46-
"notes" => cargs[:description]
47-
)
48-
# check if mlflow server exists
49-
try
50-
httpget = getproperty(REQ, "get")
51-
res = httpget(cargs[:url] * "/health")
52-
catch
53-
@error("Mlflow Server Unreachable")
54-
exit(1)
55-
end
56-
MLF.set_tracking_uri(uri=cargs[:url])
57-
name = cargs[:name]
58-
experiment = MLF.search_experiments(filter_string="name = \'$name\'")
59-
if PYC.pylen(experiment) != 0
60-
MLF.set_experiment(experiment[0].name)
61-
else
62-
theexperiment = MLF.create_experiment(name=name, tags=experiment_tags)
63-
cargs[:experiment_id] = theexperiment
64-
end
44+
initmlflowcargs!(cargs)
6545
new(cargs[:name], cargs)
6646
end
6747
end
@@ -82,35 +62,18 @@ function (obj::AutoMLFlowAnomalyDetection)(; args...)
8262
end
8363

8464
function fit!(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame, Y::Vector)
85-
# end any running experiment
86-
# MLF.end_run()
87-
# generate run name
88-
run_name = mlfad.model[:name] * "_" * "fit" * "_" * randstring(3)
89-
mlfad.model[:run_name] = run_name
90-
MLF.set_experiment(mlfad.model[:name])
91-
MLF.start_run(run_name=run_name)
92-
# get run_id
93-
run = MLF.active_run()
94-
mlfad.model[:run_id] = run.info.run_id
65+
setupautofit!(mlfad)
9566
# automate anomaly detection
9667
votepercent = mlfad.model[:votepercent]
9768
autoad = AutoAnomalyDetection(Dict(:votepercent => votepercent))
9869
adoutput = fit_transform!(autoad, X, Y)
70+
# save model in memory
71+
mlfad.model[:automodel] = autoad
72+
# log info to mlflow
9973
MLF.log_param("ADOutput", adoutput)
10074
MLF.log_metric("votepercent", autoad.model[:votepercent])
101-
# save model in mlflow
102-
artifact_name = mlfad.model[:artifact_name]
103-
# use temporary directory
104-
tmpdir = tempdir()
105-
artifact_location = joinpath(tmpdir, artifact_name)
106-
serialize(artifact_location, autoad)
107-
MLF.log_artifact(artifact_location)
108-
# save model in memory
109-
mlfad.model[:autoad] = autoad
110-
bestmodel_uri = MLF.get_artifact_uri(artifact_path=artifact_name)
111-
# save model uri location
112-
mlfad.model[:bestmodel_uri] = bestmodel_uri
113-
MLF.end_run()
75+
# log artifacts, end experiment run
76+
logmlartifact(mlfad)
11477
end
11578

11679
function fit(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame, Y::Vector)
@@ -120,29 +83,7 @@ function fit(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame, Y::Vector)
12083
end
12184

12285
function transform!(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame)
123-
MLF.end_run()
124-
# download model artifact
125-
run_id = mlfad.model[:run_id]
126-
artifact_name = mlfad.model[:artifact_name]
127-
128-
try
129-
model_artifacts = MLF.artifacts.list_artifacts(run_id=run_id)
130-
@assert model_artifacts[0].path |> string == "autoad.bin"
131-
catch e
132-
@info e
133-
throw("Artifact $artifact_name does not exist in run_id = $run_id")
134-
end
135-
136-
run_name = mlfad.model[:name] * "_" * "transform" * "_" * randstring(3)
137-
mlfad.model[:run_name] = run_name
138-
MLF.set_experiment(mlfad.model[:name])
139-
MLF.start_run(run_name=run_name)
140-
pylocalpath = MLF.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_name)
141-
bestmodel = deserialize(string(pylocalpath))
142-
Y = transform!(bestmodel, X)
143-
MLF.log_param("output", Y)
144-
MLF.end_run()
145-
return Y
86+
return autotransform!(mlfad, X)
14687
end
14788

14889
function transform(mlfad::AutoMLFlowAnomalyDetection, X::DataFrame)

AutoAI/src/automlflowclassification.jl

Lines changed: 11 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ function __init__()
2323
PYC.pycopy!(REQ, PYC.pyimport("requests"))
2424
end
2525

26+
include("./mlflowutils.jl")
27+
2628
mutable struct AutoMLFlowClassification <: Workflow
2729
name::String
2830
model::Dict{Symbol,Any}
@@ -38,29 +40,7 @@ mutable struct AutoMLFlowClassification <: Workflow
3840
:impl_args => Dict()
3941
)
4042
cargs = nested_dict_merge(default_args, args)
41-
#cargs[:name] = cargs[:name] * "_" * randstring(3)
42-
experiment_tags = Dict(
43-
"projectname" => cargs[:projectname],
44-
"projecttype" => cargs[:projecttype],
45-
"notes" => cargs[:description]
46-
)
47-
# check if mlflow server exists
48-
try
49-
httpget = getproperty(REQ, "get")
50-
res = httpget(cargs[:url] * "/health")
51-
catch
52-
@error("Mlflow Server Unreachable")
53-
exit(1)
54-
end
55-
MLF.set_tracking_uri(uri=cargs[:url])
56-
name = cargs[:name]
57-
experiment = MLF.search_experiments(filter_string="name = \'$name\'")
58-
if PYC.pylen(experiment) != 0
59-
MLF.set_experiment(experiment[0].name)
60-
else
61-
theexperiment = MLF.create_experiment(name=name, tags=experiment_tags)
62-
cargs[:experiment_id] = theexperiment
63-
end
43+
initmlflowcargs!(cargs)
6444
new(cargs[:name], cargs)
6545
end
6646
end
@@ -81,36 +61,20 @@ function (obj::AutoMLFlowClassification)(; args...)
8161
end
8262

8363
function fit!(mlfcl::AutoMLFlowClassification, X::DataFrame, Y::Vector)
84-
# end any running experiment
85-
# MLF.end_run()
86-
# generate run name
87-
run_name = mlfcl.model[:name] * "_" * "fit" * "_" * randstring(3)
88-
mlfcl.model[:run_name] = run_name
89-
MLF.set_experiment(mlfcl.model[:name])
90-
MLF.start_run(run_name=run_name)
91-
# get run_id
92-
run = MLF.active_run()
93-
mlfcl.model[:run_id] = run.info.run_id
64+
# start experiment run
65+
setupautofit!(mlfcl)
9466
# automate classification
9567
autoclass = AutoClassification()
9668
fit_transform!(autoclass, X, Y)
69+
# save model in memory
70+
mlfcl.model[:automodel] = autoclass
71+
# log info to mlflow
9772
bestmodel = autoclass.model[:bestpipeline].model[:description]
9873
MLF.log_param("bestmodel", bestmodel)
9974
MLF.log_param("pipelines", autoclass.model[:dfpipelines].Description)
10075
MLF.log_metric("bestperformance", autoclass.model[:performance].mean[1])
101-
# save model in mlflow
102-
artifact_name = mlfcl.model[:artifact_name]
103-
# use temporary directory
104-
tmpdir = tempdir()
105-
artifact_location = joinpath(tmpdir, artifact_name)
106-
serialize(artifact_location, autoclass)
107-
MLF.log_artifact(artifact_location)
108-
# save model in memory
109-
mlfcl.model[:autoclass] = autoclass
110-
bestmodel_uri = MLF.get_artifact_uri(artifact_path=artifact_name)
111-
# save model uri location
112-
mlfcl.model[:bestmodel_uri] = bestmodel_uri
113-
MLF.end_run()
76+
# log artifacts, end experiment run
77+
logmlartifact(mlfcl)
11478
end
11579

11680
function fit(mlfcl::AutoMLFlowClassification, X::DataFrame, Y::Vector)
@@ -120,29 +84,7 @@ function fit(mlfcl::AutoMLFlowClassification, X::DataFrame, Y::Vector)
12084
end
12185

12286
function transform!(mlfcl::AutoMLFlowClassification, X::DataFrame)
123-
MLF.end_run()
124-
# download model artifact
125-
run_id = mlfcl.model[:run_id]
126-
artifact_name = mlfcl.model[:artifact_name]
127-
128-
try
129-
model_artifacts = MLF.artifacts.list_artifacts(run_id=run_id)
130-
@assert model_artifacts[0].path |> string == "autoclass.bin"
131-
catch e
132-
@info e
133-
throw("Artifact $artifact_name does not exist in run_id = $run_id")
134-
end
135-
136-
run_name = mlfcl.model[:name] * "_" * "transform" * "_" * randstring(3)
137-
mlfcl.model[:run_name] = run_name
138-
MLF.set_experiment(mlfcl.model[:name])
139-
MLF.start_run(run_name=run_name)
140-
pylocalpath = MLF.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_name)
141-
bestmodel = deserialize(string(pylocalpath))
142-
Y = transform!(bestmodel, X)
143-
MLF.log_param("output", Y)
144-
MLF.end_run()
145-
return Y
87+
return autotransform!(mlfcl, X)
14688
end
14789

14890
function mlfcldriver()

AutoAI/src/automlflowregression.jl

Lines changed: 10 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ function __init__()
2323
PYC.pycopy!(REQ, PYC.pyimport("requests"))
2424
end
2525

26+
include("./mlflowutils.jl")
27+
2628
mutable struct AutoMLFlowRegression <: Workflow
2729
name::String
2830
model::Dict{Symbol,Any}
@@ -38,29 +40,7 @@ mutable struct AutoMLFlowRegression <: Workflow
3840
:impl_args => Dict()
3941
)
4042
cargs = nested_dict_merge(default_args, args)
41-
#cargs[:name] = cargs[:name] * "_" * randstring(3)
42-
experiment_tags = Dict(
43-
"projectname" => cargs[:projectname],
44-
"projecttype" => cargs[:projecttype],
45-
"notes" => cargs[:description]
46-
)
47-
# check if mlflow server exists
48-
try
49-
httpget = getproperty(REQ, "get")
50-
res = httpget(cargs[:url] * "/health")
51-
catch
52-
@error("Mlflow Server Unreachable")
53-
exit(1)
54-
end
55-
MLF.set_tracking_uri(uri=cargs[:url])
56-
name = cargs[:name]
57-
experiment = MLF.search_experiments(filter_string="name = \'$name\'")
58-
if PYC.pylen(experiment) != 0
59-
MLF.set_experiment(experiment[0].name)
60-
else
61-
theexperiment = MLF.create_experiment(name=name, tags=experiment_tags)
62-
cargs[:experiment_id] = theexperiment
63-
end
43+
initmlflowcargs!(cargs)
6444
new(cargs[:name], cargs)
6545
end
6646
end
@@ -81,37 +61,19 @@ function (obj::AutoMLFlowRegression)(; args...)
8161
end
8262

8363
function fit!(mlfreg::AutoMLFlowRegression, X::DataFrame, Y::Vector)
84-
MLF.end_run()
85-
# end any running experiment
86-
# MLF.end_run()
87-
# generate run name
88-
run_name = mlfreg.model[:name] * "_" * "fit" * "_" * randstring(3)
89-
mlfreg.model[:run_name] = run_name
90-
MLF.set_experiment(mlfreg.model[:name])
91-
MLF.start_run(run_name=run_name)
92-
# get run_id
93-
run = MLF.active_run()
94-
mlfreg.model[:run_id] = run.info.run_id
64+
setupautofit!(mlfreg)
9565
# automate regression
9666
autoreg = AutoRegression()
9767
fit_transform!(autoreg, X, Y)
68+
# save model in memory
69+
mlfreg.model[:automodel] = autoreg
70+
# log info to mlflow
9871
bestmodel = autoreg.model[:bestpipeline].model[:description]
9972
MLF.log_param("bestmodel", bestmodel)
10073
MLF.log_param("pipelines", autoreg.model[:dfpipelines].Description)
10174
MLF.log_metric("bestperformance", autoreg.model[:performance].mean[1])
102-
# save model in mlflow
103-
artifact_name = mlfreg.model[:artifact_name]
104-
# use temporary directory
105-
tmpdir = tempdir()
106-
artifact_location = joinpath(tmpdir, artifact_name)
107-
serialize(artifact_location, autoreg)
108-
MLF.log_artifact(artifact_location)
109-
# save model in memory
110-
mlfreg.model[:autoreg] = autoreg
111-
bestmodel_uri = MLF.get_artifact_uri(artifact_path=artifact_name)
112-
# save model uri location
113-
mlfreg.model[:bestmodel_uri] = bestmodel_uri
114-
MLF.end_run()
75+
# log artifacts, end experiment run
76+
logmlartifact(mlfreg)
11577
end
11678

11779
function fit(mlfreg::AutoMLFlowRegression, X::DataFrame, Y::Vector)
@@ -121,29 +83,7 @@ function fit(mlfreg::AutoMLFlowRegression, X::DataFrame, Y::Vector)
12183
end
12284

12385
function transform!(mlfreg::AutoMLFlowRegression, X::DataFrame)
124-
MLF.end_run()
125-
# download model artifact
126-
run_id = mlfreg.model[:run_id]
127-
artifact_name = mlfreg.model[:artifact_name]
128-
129-
try
130-
model_artifacts = MLF.artifacts.list_artifacts(run_id=run_id)
131-
@assert model_artifacts[0].path |> string == "autoreg.bin"
132-
catch e
133-
@info e
134-
throw("Artifact $artifact_name does not exist in run_id = $run_id")
135-
end
136-
137-
run_name = mlfreg.model[:name] * "_" * "transform" * "_" * randstring(3)
138-
mlfreg.model[:run_name] = run_name
139-
MLF.set_experiment(mlfreg.model[:name])
140-
MLF.start_run(run_name=run_name)
141-
pylocalpath = MLF.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_name)
142-
bestmodel = deserialize(string(pylocalpath))
143-
Y = transform!(bestmodel, X)
144-
MLF.log_param("output", Y)
145-
MLF.end_run()
146-
return Y
86+
return autotransform!(mlfreg, X)
14787
end
14888

14989
function mlfregdriver()

0 commit comments

Comments
 (0)