Merge pull request #12 from avik-pal/ap/updatesd

Updates
avik-pal · Feb 25, 2022 · 379dc8f · 379dc8f · avik-pal · Feb 25, 2022
2 parents a5de84c + 2a7d8ef
commit 379dc8f
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 31 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -6,6 +6,8 @@ on:
       - main
     tags: '*'
   pull_request:
+    branches:
+      - main
 
 jobs:
   build:
@@ -18,7 +20,7 @@ jobs:
       - name: Install dependencies
         run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
       - name: Build and deploy
+        run: julia --project=docs/ docs/make.jl
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token
-          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key
-        run: julia --project=docs/ docs/make.jl
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Wandb"
 uuid = "ad70616a-06c9-5745-b1f1-6a5f42545108"
 authors = ["Avik Pal <avikpal@mit.edu> and contributors"]
-version = "0.3.5"
+version = "0.4.0"
 
 [deps]
 Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"

diff --git a/README.md b/README.md
@@ -4,3 +4,18 @@
 [![Stable Docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://avik-pal.github.io/Wandb.jl/stable/)
 
 Unofficial Julia Bindings for [wandb.ai](https://wandb.ai).
+
+## Installation
+
+To install simply do the following in a Julia REPL
+
+```julia
+] add Wandb
+```
+
+## Changelog
+
+### v0.4.0
+
+* `Base.log` is no longer exported. Users need to do `Wandb.log` (https://github.com/avik-pal/Wandb.jl/issues/9)
+* `FluxMPI` + `Wandb` integration demo updated to the latest API
diff --git a/docs/src/examples/mpi.md b/docs/src/examples/mpi.md
@@ -3,50 +3,44 @@
 For this example we will use the [FluxMPI.jl](https://github.com/avik-pal/FluxMPI.jl) package which adds Multi-GPU/Node Training support for Flux.jl using MPI.jl
 
 ```julia
-using Flux, FluxMPI, MPI, Zygote, CUDA, Wandb, Dates
+using Flux, FluxMPI, CUDA, Dates, Wandb,  Zygote
 
-MPI.Init()
+# Step 1: Initialize FluxMPI. Not doing this will segfault your code
+FluxMPI.Init()
 CUDA.allowscalar(false)
 
 lg = WandbLoggerMPI(project = "Wandb.jl",
                     name = "mpijl-demo-$(now())")
 
-total_gpus = length(CUDA.devices())
-comm = MPI.COMM_WORLD
-rank = MPI.Comm_rank(comm)
-size = MPI.Comm_size(comm)
+# Step 2: Sync Model Parameters
+model = Chain(Dense(1, 2, tanh), Dense(2, 1)) |> gpu
+ps = Flux.params(model)
+broadcast_parameters(model; root_rank = 0)
 
-model = Chain(Dense(1, 2, tanh), Dense(2, 1))
-
-model_dp = DataParallelFluxModel(
-    model,
-    [i % total_gpus for i = 1:MPI.Comm_size(MPI.COMM_WORLD)],
-)
-
-ps = Flux.params(model_dp)
-
-x = rand(1, 64) |> gpu
+# It is the user's responsibility to partition the data across the processes
+# In this case, we are training on a total of 16 * <np> samples
+x = rand(1, 16) |> gpu
 y = x .^ 2
+dataloader = Flux.DataLoader((x, y), batchsize = 16)
 
-dataloader = DataParallelDataLoader((x, y), batchsize = 16)
+# Step 3: Wrap the optimizer in DistributedOptimizer
+#         Scale the learning rate by the number of workers (`total_workers()`).
+opt = DistributedOptimiser(Flux.ADAM(0.001))
 
 function loss(x_, y_)
-    loss = sum(abs2, model_dp(x_) .- y_)
-    Zygote.@ignore log(lg, Dict("loss" => loss))
-    return loss
+    l = sum(abs2, model(x_) .- y_)
+    Zygote.@ignore Wandb.log(lg, Dict("loss" => l))
+    return l
 end
 
 for epoch in 1:100
-    if rank == 0
-        @info "epoch = $epoch" 
-    end
-    Flux.Optimise.train!(loss, ps, dataloader, Flux.ADAM(0.001))
+    Flux.Optimise.train!(loss, ps, dataloader, opt)
 end
 ```
 
 The main points when using MPI and Wandb are:
 
-1. `MPI.Init()` must be called before `WandbLoggerMPI` is called.
+1. `FluxMPI.Init()` must be called before `WandbLoggerMPI` is called.
 2. The `config` cannot be updated after `WandbLoggerMPI` is initialized, i.e. `update_config!` won't work.
 3. Logging is done the following manner:
    1. If `group` kwarg is not passed/ is `nothing`: All the logging is done by the process with `rank = 0`.

diff --git a/src/main.jl b/src/main.jl
@@ -35,9 +35,7 @@ end
 increment_step!(lg::WandbLogger, Δ_Step) = lg.global_step += Δ_Step
 
 # https://docs.wandb.ai/guides/track/log
-# Probably shouldn't do this but want to stay consistent with the
-# Wandb API
-Base.log(lg::WandbLogger, logs::Dict; kwargs...) = lg.wrun.log(logs; kwargs...)
+log(lg::WandbLogger, logs::Dict; kwargs...) = lg.wrun.log(logs; kwargs...)
 
 # https://docs.wandb.ai/guides/track/config
 update_config!(lg::WandbLogger, dict::Dict; kwargs...) = lg.wrun.config.update(dict; kwargs...)