IBM
diff --git a/‎AutoAI/CondaPkg.toml
Lines changed: 2 additions & 0 deletions b/‎AutoAI/CondaPkg.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎AutoAI/src/AutoAI.jl
Lines changed: 12 additions & 1 deletion b/‎AutoAI/src/AutoAI.jl
Lines changed: 12 additions & 1 deletion
diff --git a/‎AutoAI/src/autoanomalydetector.jl
Lines changed: 94 additions & 0 deletions b/‎AutoAI/src/autoanomalydetector.jl
Lines changed: 94 additions & 0 deletions
diff --git a/‎AutoAI/src/automlflowclassification.jl
Lines changed: 168 additions & 0 deletions b/‎AutoAI/src/automlflowclassification.jl
Lines changed: 168 additions & 0 deletions
@@ -1,2 +1,4 @@
 [deps]
+mlflow = ""
 pycaret = ""
+requests = ""
@@ -32,5 +32,16 @@ include("carettspredictor.jl")
 using .CaretTSPredictors
 export CaretTSPredictor, carettsdriver
 
-greet() = print("Hello World!")
+include("automlflowclassification.jl")
+using .AutoMLFlowClassifications
+export mlfcldriver
+
+include("automlflowregression.jl")
+using .AutoMLFlowRegressions
+export mlfregdriver
+
+include("autoanomalydetector.jl")
+using .AutoAnomalyDetectors
+export autoaddriver
+
 end # module AutoAI
@@ -0,0 +1,94 @@
+module AutoAnomalyDetectors
+using Distributed
+using AutoMLPipeline
+using DataFrames: DataFrame, nrow, rename!
+using AutoMLPipeline: score
+using Random
+using Statistics
+using ..AbsTypes
+using ..Utils
+using ..CaretAnomalyDetectors
+import ..CaretAnomalyDetectors.caretadlearner_dict
+using ..SKAnomalyDetectors
+
+import ..AbsTypes: fit, fit!, transform, transform!
+export fit, fit!, transform, transform!
+export AutoAnomalyDetector, autoaddriver
+
+# define customized type
+mutable struct AutoAnomalyDetector <: Workflow
+  name::String
+  model::Dict{Symbol,Any}
+
+  function AutoAnomalyDetector(args=Dict())
+    default_args = Dict(
+      :name => "autoad",
+      :votepercent => 0.0, # output all votepercent if 0.0, otherwise get specific votepercent
+      :impl_args => Dict()
+    )
+    cargs = nested_dict_merge(default_args, args)
+    cargs[:name] = cargs[:name] * "_" * randstring(3)
+    new(cargs[:name], cargs)
+  end
+end
+
+function fit!(autodt::AutoAnomalyDetector, X::DataFrame, Y::Vector)
+  return nothing
+end
+
+function fit(clfb::AutoAnomalyDetector, X::DataFrame, Y::Vector)
+  return nothing
+end
+
+function transform!(autodt::AutoAnomalyDetector, X::DataFrame)
+  # detect anomalies using caret
+  dfres1 = DataFrame()
+  for learner in keys(caretadlearner_dict)
+    model = CaretAnomalyDetector(learner)
+    res = fit_transform!(model, X)
+    mname = string(learner)
+    dfres1 = hcat(dfres1, DataFrame(mname => res; makeunique=true))
+  end
+
+  #detect anomalies using scikitlearn
+  iso = SKAnomalyDetector("IsolationForest")
+  eli = SKAnomalyDetector("EllipticEnvelope")
+  osvm = SKAnomalyDetector("OneClassSVM")
+  lcl = SKAnomalyDetector("LocalOutlierFactor")
+  isores = fit_transform!(iso, X)
+  elires = fit_transform!(eli, X)
+  osvmres = fit_transform!(osvm, X)
+  lclres = fit_transform!(lcl, X)
+  dfres2 = DataFrame(iso=isores, eli=elires, osvm=osvmres, lcl=lclres)
+
+  # combine results and get mean anomaly for each row
+  mdf = hcat(dfres1, dfres2)
+  mdfm = hcat(mdf, DataFrame(admean=mean.(eachrow(mdf))))
+  # filter anomalies based on mean cut-off
+  votepercent = autodt.model[:votepercent]
+  if votepercent == 0.0
+    dfad = @distributed (hcat) for cutoff in 0.1:0.1:1.0
+      ndx = map(x -> x >= cutoff, mdfm.admean)
+      n = string(cutoff)
+      DataFrame(n => ndx)
+    end
+    return dfad
+  else
+    ndx = map(x -> x >= votepercent, mdfm.admean)
+    n = string(votepercent)
+    dfad = DataFrame(n => ndx)
+    return dfad
+  end
+end
+
+function transform(autodt::AutoAnomalyDetector, X::DataFrame)
+end
+
+function autoaddriver()
+  autoaddt = AutoAnomalyDetector(Dict(:votepercent => 0.0))
+  X = vcat(5 * cos.(-10:10), sin.(-30:30), 3 * cos.(-10:10), 2 * tan.(-10:10), sin.(-30:30)) |> x -> DataFrame([x], :auto)
+  fit_transform!(autoaddt, X)
+end
+
+
+end
@@ -0,0 +1,168 @@
+module AutoMLFlowClassifications
+using Statistics
+using Serialization
+import PythonCall
+const PYC = PythonCall
+
+using DataFrames: DataFrame
+using Random
+using ..AbsTypes
+using ..Utils
+using ..AutoClassifications
+using ..AutoMLPipeline: getiris
+
+import ..AbsTypes: fit, fit!, transform, transform!
+export fit, fit!, transform, transform!
+export mlfcldriver, AutoMLFlowClassification
+
+const MLF = PYC.pynew()
+const REQ = PYC.pynew()
+
+function __init__()
+  PYC.pycopy!(MLF, PYC.pyimport("mlflow"))
+  PYC.pycopy!(REQ, PYC.pyimport("requests"))
+end
+
+mutable struct AutoMLFlowClassification <: Workflow
+  name::String
+  model::Dict{Symbol,Any}
+
+  function AutoMLFlowClassification(args=Dict())
+    default_args = Dict(
+      :name => "AutoMLClassification",
+      :projectname => "AutoMLClassification",
+      :url => "http://localhost:8080",
+      :description => "Automated Classification",
+      :projecttype => "classification",
+      :artifact_name => "autoclass.bin",
+      :impl_args => Dict()
+    )
+    cargs = nested_dict_merge(default_args, args)
+    #cargs[:name] = cargs[:name] * "_" * randstring(3)
+    experiment_tags = Dict(
+      "projectname" => cargs[:projectname],
+      "projecttype" => cargs[:projecttype],
+      "notes" => cargs[:description]
+    )
+    # check if mlflow server exists
+    try
+      httpget = getproperty(REQ, "get")
+      res = httpget(cargs[:url] * "/health")
+    catch
+      @error("Mlflow Server Unreachable")
+      exit(1)
+    end
+    MLF.set_tracking_uri(uri=cargs[:url])
+    name = cargs[:name]
+    experiment = MLF.search_experiments(filter_string="name = \'$name\'")
+    if PYC.pylen(experiment) != 0
+      MLF.set_experiment(experiment[0].name)
+    else
+      theexperiment = MLF.create_experiment(name=name, tags=experiment_tags)
+      cargs[:experiment_id] = theexperiment
+    end
+    new(cargs[:name], cargs)
+  end
+end
+
+function AutoMLFlowClassification(name::String, args::Dict)
+  AutoMLFlowClassification(Dict(:name => name, args...))
+end
+
+function AutoMLFlowClassification(name::String; args...)
+  AutoMLFlowClassification(Dict(Dict(pairs(args))...))
+end
+
+function (obj::AutoMLFlowClassification)(; args...)
+  model = obj.model
+  cargs = nested_dict_merge(model, Dict(pairs(args)))
+  obj.model = cargs
+  return obj
+end
+
+function fit!(mlfcl::AutoMLFlowClassification, X::DataFrame, Y::Vector)
+  # end any running experiment
+  # MLF.end_run()
+  # generate run name
+  run_name = mlfcl.model[:name] * "_" * "fit" * "_" * randstring(3)
+  mlfcl.model[:run_name] = run_name
+  MLF.set_experiment(mlfcl.model[:name])
+  MLF.start_run(run_name=run_name)
+  # get run_id
+  run = MLF.active_run()
+  mlfcl.model[:run_id] = run.info.run_id
+  # automate classification
+  autoclass = AutoClassification()
+  fit_transform!(autoclass, X, Y)
+  bestmodel = autoclass.model[:bestpipeline].model[:description]
+  MLF.log_param("bestmodel", bestmodel)
+  MLF.log_param("pipelines", autoclass.model[:dfpipelines].Description)
+  MLF.log_metric("bestperformance", autoclass.model[:performance].mean[1])
+  # save model in mlflow
+  artifact_name = mlfcl.model[:artifact_name]
+  # use temporary directory
+  tmpdir = tempdir()
+  artifact_location = joinpath(tmpdir, artifact_name)
+  serialize(artifact_location, autoclass)
+  MLF.log_artifact(artifact_location)
+  # save model in memory
+  mlfcl.model[:autoclass] = autoclass
+  bestmodel_uri = MLF.get_artifact_uri(artifact_path=artifact_name)
+  # save model  uri location
+  mlfcl.model[:bestmodel_uri] = bestmodel_uri
+  MLF.end_run()
+end
+
+function fit(mlfcl::AutoMLFlowClassification, X::DataFrame, Y::Vector)
+  mlfcopy = deepcopy(mlfcl)
+  fit!(mlfcopy, X, Y)
+  return mlfcopy
+end
+
+function transform!(mlfcl::AutoMLFlowClassification, X::DataFrame)
+  MLF.end_run()
+  # download model artifact
+  run_id = mlfcl.model[:run_id]
+  artifact_name = mlfcl.model[:artifact_name]
+
+  try
+    model_artifacts = MLF.artifacts.list_artifacts(run_id=run_id)
+    @assert model_artifacts[0].path |> string == "autoclass.bin"
+  catch e
+    @info e
+    throw("Artifact $artifact_name does not exist in run_id = $run_id")
+  end
+
+  run_name = mlfcl.model[:name] * "_" * "transform" * "_" * randstring(3)
+  mlfcl.model[:run_name] = run_name
+  MLF.set_experiment(mlfcl.model[:name])
+  MLF.start_run(run_name=run_name)
+  pylocalpath = MLF.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_name)
+  bestmodel = deserialize(string(pylocalpath))
+  Y = transform!(bestmodel, X)
+  MLF.log_param("output", Y)
+  MLF.end_run()
+  return Y
+end
+
+function mlfcldriver()
+  df = getiris()
+  X = df[:, 1:end-1]
+  Y = df[:, end] |> collect
+
+  mlfclass = AutoMLFlowClassification()
+  Yc = fit_transform!(mlfclass, X, Y)
+  println("accuracy = ", mean(Y .== Yc))
+
+  # test prediction using exisiting trained model from artifacts
+  run_id = mlfclass.model[:run_id]
+  newmfclass = AutoMLFlowClassification(Dict(:run_id => run_id))
+  newmfclass = AutoMLFlowClassification()
+  newmfclass(; run_id=run_id)
+  Yn = transform!(newmfclass, X)
+  println("accuracy = ", mean(Yn .== Y))
+
+  return nothing
+end
+
+end