JuliaDecisionFocusedLearning · BatyLeo · Oct 8, 2024 · May 13, 2024 · May 14, 2024 · May 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,7 @@
 .vscode
 /docs/src/index.md
+data
+scripts
 
 # Files generated by invoking Julia with --code-coverage
 *.jl.cov

diff --git a/Project.toml b/Project.toml
@@ -4,17 +4,39 @@ authors = ["Members of JuliaDecisionFocusedLearning"]
 version = "0.1.0"
 
 [deps]
+DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
+HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
+Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
 InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f"
+Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
+JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
+NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SimpleWeightedGraphs = "47aef6b3-ad0c-573a-a1e2-d07658019622"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
+DataDeps = "0.7"
+Distributions = "0.25"
+DocStringExtensions = "0.9"
+Flux = "0.14"
 Graphs = "1.11"
+HiGHS = "1.9"
+Images = "0.26"
 InferOpt = "0.6"
+Ipopt = "1.6"
+JuMP = "1.22"
 LinearAlgebra = "1"
+Metalhead = "0.9"
+NPZ = "0.4"
+Plots = "1"
 Random = "1"
 SimpleWeightedGraphs = "1.4"
 SparseArrays = "1"
@@ -23,25 +45,18 @@ julia = "1.6"
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
 JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
+ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
+UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = [
-    "Aqua",
-    "Documenter",
-    "Graphs",
-    "JET",
-    "JuliaFormatter",
-    "Random",
-    "StableRNGs",
-    "Statistics",
-    "Test",
-    "Zygote",
-]
+test = ["Aqua", "Documenter", "Flux", "Graphs", "JET", "JuliaFormatter", "Random", "ProgressMeter", "StableRNGs", "Statistics", "Test", "TestItemRunner", "UnicodePlots", "Zygote"]
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,3 +1,7 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+InferOpt = "4846b161-c94e-4150-8dac-c7ae193c601f"
 InferOptBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,21 +1,37 @@
 using Documenter
 using InferOptBenchmarks
+using Literate
 
 cp(joinpath(@__DIR__, "..", "README.md"), joinpath(@__DIR__, "src", "index.md"); force=true)
 
+md_dir = joinpath(@__DIR__, "src")
+tutorial_dir = joinpath(@__DIR__, "src", "tutorials")
+tutorial_files = readdir(tutorial_dir)
+md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files]
+
+for file in tutorial_files
+    filepath = joinpath(tutorial_dir, file)
+    Literate.markdown(filepath, md_dir; documenter=true, execute=false)
+end
+
 makedocs(;
     modules=[InferOptBenchmarks, InferOptBenchmarks.Warcraft],
     authors="Members of JuliaDecisionFocusedLearning",
     sitename="InferOptBenchmarks.jl",
     format=Documenter.HTML(),
     pages=[
-        "Home" => "index.md",  #
-        "API reference" => [
-            "warcraft.md",  #
-        ],
+        "Home" => "index.md",
+        "Tutorials" => md_tutorial_files,
+        "API reference" =>
+            ["api/interface.md", "api/decision_focused.md", "api/warcraft.md"],
     ],
 )
 
+for file in md_tutorial_files
+    filepath = joinpath(md_dir, file)
+    rm(filepath)
+end
+
 deploydocs(;
     repo="github.com/JuliaDecisionFocusedLearning/InferOptBenchmarks.jl", devbranch="main"
 )
diff --git a/docs/src/api/decision_focused.md b/docs/src/api/decision_focused.md
@@ -0,0 +1,15 @@
+# Decisions-focused learning paper
+
+## Public
+
+```@autodocs
+Modules = [InferOptBenchmarks.FixedSizeShortestPath, InferOptBenchmarks.PortfolioOptimization, InferOptBenchmarks.SubsetSelection]
+Private = false
+```
+
+## Private
+
+```@autodocs
+Modules = [InferOptBenchmarks.FixedSizeShortestPath, InferOptBenchmarks.PortfolioOptimization, InferOptBenchmarks.SubsetSelection]
+Public = false
+```
diff --git a/docs/src/api/interface.md b/docs/src/api/interface.md
@@ -0,0 +1,15 @@
+# Interface
+
+## Public
+
+```@autodocs
+Modules = [InferOptBenchmarks.Utils]
+Private = false
+```
+
+## Private
+
+```@autodocs
+Modules = [InferOptBenchmarks.Utils]
+Public = false
+```
diff --git a/docs/src/warcraft.md → docs/src/api/warcraft.md b/docs/src/warcraft.md → docs/src/api/warcraft.md
diff --git a/docs/src/tutorials/warcraft.jl b/docs/src/tutorials/warcraft.jl
@@ -0,0 +1,88 @@
+# # Path-finding on image maps
+
+#=
+In this tutorial, we showcase InferOptBenchmarks.jl capabilities on one of its main benchmarks: the Warcraft benchmark.
+This benchmark problem is a simple path-finding problem where the goal is to find the shortest path between the top left and bottom right corners of a given image map.
+The map is represented as a 2D image representing a 12x12 grid, each cell having an unknown travel cost depending on the terrain type.
+=#
+
+# First, let's load the package and create a benchmark object as follows:
+using InferOptBenchmarks
+b = WarcraftBenchmark()
+
+# ## Dataset generation
+
+# These benchmark objects behave as generators that can generate various needed elements in order to build an algorithm to tackle the problem.
+# First of all, all benchmarks are capable of generating datasets as needed, using the [`generate_dataset`](@ref) method.
+# This method takes as input the benchmark object for which the dataset is to be generated, and a second argument specifying the number of samples to generate:
+dataset = generate_dataset(b, 50);
+
+# We obtain a vector of [`DataSample`](@ref) objects, containing all needed data for the problem.
+# Subdatasets can be created through regular slicing:
+train_dataset, test_dataset = dataset[1:45], dataset[46:50]
+
+# And getting an individual sample will return a [`DataSample`](@ref) with four fields: `x`, `instance`, `θ`, and `y`:
+sample = test_dataset[1]
+# `x` correspond to the input features, i.e. the input image (3D array) in the Warcraft benchmark case:
+x = sample.x
+# `θ` correspond to the true unknown terrain weights. We use the opposite of the true weights in order to formulate the optimization problem as a maximization problem:
+θ_true = sample.θ
+# `y` correspond to the optimal shortest path, encoded as a binary matrix:
+y_true = sample.y
+# `instance` is not used in this benchmark, therefore set to nothing:
+isnothing(sample.instance)
+
+# For some benchmarks, we provide the following plotting method [`plot_data`](@ref) to visualize the data:
+plot_data(b, sample)
+# We can see here the terrain image, the true terrain weights, and the true shortest path avoiding the high cost cells.
+
+# ## Building a pipeline
+
+# InferOptBenchmarks also provides methods to build an hybrid machine learning and combinatorial optimization pipeline for the benchmark.
+# First, the [`generate_statistical_model`](@ref) method generates a machine learning predictor to predict cell weights from the input image:
+model = generate_statistical_model(b)
+# In the case of the Warcraft benchmark, the model is a convolutional neural network built using the Flux.jl package.
+θ = model(x)
+# Note that the model is not trained yet, and its parameters are randomly initialized.
+
+# Finally, the [`generate_maximizer`](@ref) method can be used to generate a combinatorial optimization algorithm that takes the predicted cell weights as input and returns the corresponding shortest path:
+maximizer = generate_maximizer(b; dijkstra=true)
+# In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
+y = maximizer(θ)
+# As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path.
+plot_data(b, DataSample(; x, θ, y))
+# We can evaluate the current pipeline performance using the optimality gap metric:
+starting_gap = compute_gap(b, test_dataset, model, maximizer)
+
+# ## Using a learning algorithm
+
+# We can now train the model using the InferOpt.jl package:
+using InferOpt
+using Flux
+using Plots
+
+perturbed_maximizer = PerturbedMultiplicative(maximizer; ε=0.2, nb_samples=100)
+loss = FenchelYoungLoss(perturbed_maximizer)
+
+starting_gap = compute_gap(b, test_dataset, model, maximizer)
+
+opt_state = Flux.setup(Adam(1e-3), model)
+loss_history = Float64[]
+for epoch in 1:50
+    val, grads = Flux.withgradient(model) do m
+        sum(loss(m(sample.x), sample.y) for sample in train_dataset) / length(train_dataset)
+    end
+    Flux.update!(opt_state, model, grads[1])
+    push!(loss_history, val)
+end
+
+plot(loss_history; xlabel="Epoch", ylabel="Loss", title="Training loss")
+
+#
+
+final_gap = compute_gap(b, test_dataset, model, maximizer)
+
+#
+θ = model(x)
+y = maximizer(θ)
+plot_data(b, DataSample(; x, θ, y))