Stochastic All Blacks

using SDDP, HiGHS, Test

function stochastic_all_blacks()
    # Number of time periods
    T = 3
    # Number of seats
    N = 2
    # R_ij = price of seat i at time j
    R = [3 3 6; 3 3 6]
    # Number of noises
    s = 3
    offers = [
        [[1, 1], [0, 0], [1, 1]],
        [[1, 0], [0, 0], [0, 0]],
        [[0, 1], [1, 0], [1, 1]],
    ]

    model = SDDP.LinearPolicyGraph(
        stages = T,
        sense = :Max,
        upper_bound = 100.0,
        optimizer = HiGHS.Optimizer,
    ) do sp, stage
        # Seat remaining?
        @variable(sp, 0 <= x[1:N] <= 1, SDDP.State, Bin, initial_value = 1)
        # Action: accept offer, or don't accept offer
        # We are allowed to accept some of the seats offered but not others
        @variable(sp, accept_offer[1:N], Bin)
        @variable(sp, offers_made[1:N])
        # Balance on seats
        @constraint(
            sp,
            balance[i in 1:N],
            x[i].in - x[i].out == accept_offer[i]
        )
        @stageobjective(sp, sum(R[i, stage] * accept_offer[i] for i in 1:N))
        SDDP.parameterize(sp, offers[stage]) do o
            return JuMP.fix.(offers_made, o)
        end
        @constraint(sp, accept_offer .<= offers_made)
    end

    SDDP.train(
        model,
        iteration_limit = 10,
        duality_handler = SDDP.LagrangianDuality(),
    )
    @test SDDP.calculate_bound(model) ≈ 8.0
    return
end

stochastic_all_blacks()
------------------------------------------------------------------------------
          SDDP.jl (c) Oscar Dowson and SDDP.jl contributors, 2017-23

Problem
  Nodes           : 3
  State variables : 2
  Scenarios       : 2.70000e+01
  Existing cuts   : false
  Subproblem structure                      : (min, max)
    Variables                               : (9, 9)
    VariableRef in MOI.LessThan{Float64}    : (3, 3)
    VariableRef in MOI.ZeroOne              : (4, 4)
    AffExpr in MOI.LessThan{Float64}        : (2, 2)
    VariableRef in MOI.GreaterThan{Float64} : (2, 3)
    AffExpr in MOI.EqualTo{Float64}         : (2, 2)
Options
  Solver          : serial mode
  Risk measure    : SDDP.Expectation()
  Sampling scheme : SDDP.InSampleMonteCarlo

Numerical stability report
  Non-zero Matrix range     [1e+00, 1e+00]
  Non-zero Objective range  [1e+00, 6e+00]
  Non-zero Bounds range     [1e+00, 1e+02]
  Non-zero RHS range        [0e+00, 0e+00]
No problems detected

 Iteration    Simulation       Bound         Time (s)    Proc. ID   # Solves
        1L   6.000000e+00   1.414815e+01   5.578589e-02          1         11
        2L   6.000000e+00   1.414815e+01   6.192493e-02          1         22
        3L   6.000000e+00   9.111111e+00   6.956291e-02          1         33
        4L   1.200000e+01   8.777778e+00   8.047891e-02          1         44
        5L   9.000000e+00   8.777778e+00   9.310293e-02          1         55
        6L   6.000000e+00   8.777778e+00   1.050189e-01          1         66
        7L   3.000000e+00   8.333333e+00   1.150780e-01          1         77
        8L   1.200000e+01   8.000000e+00   1.247280e-01          1         88
        9L   6.000000e+00   8.000000e+00   1.349599e-01          1         99
       10L   6.000000e+00   8.000000e+00   1.446669e-01          1        110

Terminating training
  Status         : iteration_limit
  Total time (s) : 1.446669e-01
  Total solves   : 110
  Best bound     :  8.000000e+00
  Simulation CI  :  7.200000e+00 ± 1.796370e+00
------------------------------------------------------------------------------

This page was generated using Literate.jl.