Partially observable inventory management

This tutorial was generated using Literate.jl. Download the source as a .jl file. Download the source as a .ipynb file.

using SDDP, HiGHS, Random, Statistics, Test

function inventory_management_problem()
    demand_values = [1.0, 2.0]
    demand_prob = Dict(:Ah => [0.2, 0.8], :Bh => [0.8, 0.2])
    graph = SDDP.Graph(
        :root_node,
        [:Ad, :Ah, :Bd, :Bh],
        [
            (:root_node => :Ad, 0.5),
            (:root_node => :Bd, 0.5),
            (:Ad => :Ah, 1.0),
            (:Ah => :Ad, 0.8),
            (:Ah => :Bd, 0.1),
            (:Bd => :Bh, 1.0),
            (:Bh => :Bd, 0.8),
            (:Bh => :Ad, 0.1),
        ],
    )
    SDDP.add_ambiguity_set(graph, [:Ad, :Bd], 1e2)
    SDDP.add_ambiguity_set(graph, [:Ah, :Bh], 1e2)

    model = SDDP.PolicyGraph(
        graph;
        lower_bound = 0.0,
        optimizer = HiGHS.Optimizer,
    ) do subproblem, node
        @variables(
            subproblem,
            begin
                0 <= inventory <= 2, (SDDP.State, initial_value = 0.0)
                buy >= 0
                demand
            end
        )
        @constraint(subproblem, demand == inventory.in - inventory.out + buy)
        if node == :Ad || node == :Bd || node == :D
            JuMP.fix(demand, 0)
            @stageobjective(subproblem, buy)
        else
            SDDP.parameterize(subproblem, demand_values, demand_prob[node]) do ω
                return JuMP.fix(demand, ω)
            end
            @stageobjective(subproblem, 2 * buy + inventory.out)
        end
    end
    # Train the policy.
    Random.seed!(123)
    SDDP.train(
        model;
        iteration_limit = 100,
        cut_type = SDDP.SINGLE_CUT,
        log_frequency = 10,
        parallel_scheme = SDDP.Serial(),
    )
    results = SDDP.simulate(model, 500; parallel_scheme = SDDP.Serial())
    objectives =
        [sum(s[:stage_objective] for s in simulation) for simulation in results]
    sample_mean = round(Statistics.mean(objectives); digits = 2)
    sample_ci = round(1.96 * Statistics.std(objectives) / sqrt(500); digits = 2)
    @test SDDP.calculate_bound(model) ≈ sample_mean atol = sample_ci
    return
end

inventory_management_problem()
-------------------------------------------------------------------
         SDDP.jl (c) Oscar Dowson and contributors, 2017-24
-------------------------------------------------------------------
problem
  nodes           : 4
  state variables : 1
  scenarios       : Inf
  existing cuts   : false
options
  solver          : serial mode
  risk measure    : SDDP.Expectation()
  sampling scheme : SDDP.InSampleMonteCarlo
subproblem structure
  VariableRef                             : [7, 7]
  AffExpr in MOI.EqualTo{Float64}         : [1, 1]
  AffExpr in MOI.GreaterThan{Float64}     : [2, 2]
  VariableRef in MOI.EqualTo{Float64}     : [1, 1]
  VariableRef in MOI.GreaterThan{Float64} : [5, 5]
  VariableRef in MOI.LessThan{Float64}    : [3, 3]
numerical stability report
  matrix range     [1e+00, 1e+00]
  objective range  [1e+00, 2e+00]
  bounds range     [2e+00, 1e+02]
  rhs range        [0e+00, 0e+00]
-------------------------------------------------------------------
 iteration    simulation      bound        time (s)     solves  pid
-------------------------------------------------------------------
        10   4.787277e+00  9.346930e+00  1.456003e+00       900   1
        20   6.374753e+00  1.361934e+01  1.630589e+00      1720   1
        30   2.813321e+01  1.651297e+01  2.081573e+00      3036   1
        40   1.654759e+01  1.632970e+01  3.057816e+00      4192   1
        50   3.570941e+00  1.846889e+01  3.372474e+00      5020   1
        60   1.087425e+01  1.890254e+01  3.719990e+00      5808   1
        70   9.381610e+00  1.940320e+01  4.129348e+00      6540   1
        80   5.648731e+01  1.962435e+01  4.457603e+00      7088   1
        90   3.879273e+01  1.981008e+01  5.068689e+00      8180   1
       100   7.870187e+00  1.997117e+01  5.357475e+00      8664   1
-------------------------------------------------------------------
status         : iteration_limit
total time (s) : 5.357475e+00
total solves   : 8664
best bound     :  1.997117e+01
simulation ci  :  2.275399e+01 ± 4.541987e+00
numeric issues : 0
-------------------------------------------------------------------