Partially observable inventory management

using SDDP, HiGHS, Random, Statistics, Test

function inventory_management_problem()
    demand_values = [1.0, 2.0]
    demand_prob = Dict(:Ah => [0.2, 0.8], :Bh => [0.8, 0.2])
    graph = SDDP.Graph(
        :root_node,
        [:Ad, :Ah, :Bd, :Bh],
        [
            (:root_node => :Ad, 0.5),
            (:root_node => :Bd, 0.5),
            (:Ad => :Ah, 1.0),
            (:Ah => :Ad, 0.8),
            (:Ah => :Bd, 0.1),
            (:Bd => :Bh, 1.0),
            (:Bh => :Bd, 0.8),
            (:Bh => :Ad, 0.1),
        ],
    )
    SDDP.add_ambiguity_set(graph, [:Ad, :Bd], 1e2)
    SDDP.add_ambiguity_set(graph, [:Ah, :Bh], 1e2)

    model = SDDP.PolicyGraph(
        graph,
        lower_bound = 0.0,
        optimizer = HiGHS.Optimizer,
    ) do subproblem, node
        @variables(
            subproblem,
            begin
                0 <= inventory <= 2, (SDDP.State, initial_value = 0.0)
                buy >= 0
                demand
            end
        )
        @constraint(subproblem, demand == inventory.in - inventory.out + buy)
        if node == :Ad || node == :Bd || node == :D
            JuMP.fix(demand, 0)
            @stageobjective(subproblem, buy)
        else
            SDDP.parameterize(subproblem, demand_values, demand_prob[node]) do ω
                return JuMP.fix(demand, ω)
            end
            @stageobjective(subproblem, 2 * buy + inventory.out)
        end
    end
    # Train the policy.
    Random.seed!(123)
    SDDP.train(
        model;
        iteration_limit = 100,
        cut_type = SDDP.SINGLE_CUT,
        log_frequency = 10,
    )
    results = SDDP.simulate(model, 500)
    objectives =
        [sum(s[:stage_objective] for s in simulation) for simulation in results]
    sample_mean = round(Statistics.mean(objectives); digits = 2)
    sample_ci = round(1.96 * Statistics.std(objectives) / sqrt(500); digits = 2)
    @test SDDP.calculate_bound(model) ≈ sample_mean atol = sample_ci
    return
end

inventory_management_problem()

------------------------------------------------------------------------------
          SDDP.jl (c) Oscar Dowson and SDDP.jl contributors, 2017-23

Problem
  Nodes           : 4
  State variables : 1
  Scenarios       : Inf
  Existing cuts   : false
  Subproblem structure                      : (min, max)
    Variables                               : (7, 7)
    VariableRef in MOI.EqualTo{Float64}     : (1, 1)
    VariableRef in MOI.LessThan{Float64}    : (3, 3)
    AffExpr in MOI.GreaterThan{Float64}     : (2, 2)
    AffExpr in MOI.EqualTo{Float64}         : (1, 1)
    VariableRef in MOI.GreaterThan{Float64} : (5, 5)
Options
  Solver          : serial mode
  Risk measure    : SDDP.Expectation()
  Sampling scheme : SDDP.InSampleMonteCarlo

Numerical stability report
  Non-zero Matrix range     [1e+00, 1e+00]
  Non-zero Objective range  [1e+00, 2e+00]
  Non-zero Bounds range     [2e+00, 1e+02]
  Non-zero RHS range        [0e+00, 0e+00]
No problems detected

 Iteration    Simulation       Bound         Time (s)    Proc. ID   # Solves
       10    4.787277e+00   9.346930e+00   1.925600e-01          1        900
       20    6.374753e+00   1.361934e+01   3.971820e-01          1       1720
       30    2.813321e+01   1.651297e+01   8.020878e-01          1       3036
       40    1.654759e+01   1.632970e+01   1.235394e+00          1       4192
       50    3.570941e+00   1.846889e+01   1.571749e+00          1       5020
       60    1.087425e+01   1.890254e+01   1.988171e+00          1       5808
       70    9.381610e+00   1.940320e+01   2.375469e+00          1       6540
       80    5.648731e+01   1.962435e+01   2.665044e+00          1       7088
       90    3.879273e+01   1.981008e+01   3.356170e+00          1       8180
      100    7.870187e+00   1.997117e+01   3.655151e+00          1       8664

Terminating training
  Status         : iteration_limit
  Total time (s) : 3.655151e+00
  Total solves   : 8664
  Best bound     :  1.997117e+01
  Simulation CI  :  2.275399e+01 ± 4.541987e+00
------------------------------------------------------------------------------

This page was generated using Literate.jl.