Partially observable inventory management
using SDDP, HiGHS, Random, Statistics, Test
function inventory_management_problem()
demand_values = [1.0, 2.0]
demand_prob = Dict(:Ah => [0.2, 0.8], :Bh => [0.8, 0.2])
graph = SDDP.Graph(
:root_node,
[:Ad, :Ah, :Bd, :Bh],
[
(:root_node => :Ad, 0.5),
(:root_node => :Bd, 0.5),
(:Ad => :Ah, 1.0),
(:Ah => :Ad, 0.8),
(:Ah => :Bd, 0.1),
(:Bd => :Bh, 1.0),
(:Bh => :Bd, 0.8),
(:Bh => :Ad, 0.1),
],
)
SDDP.add_ambiguity_set(graph, [:Ad, :Bd], 1e2)
SDDP.add_ambiguity_set(graph, [:Ah, :Bh], 1e2)
model = SDDP.PolicyGraph(
graph,
lower_bound = 0.0,
optimizer = HiGHS.Optimizer,
) do subproblem, node
@variables(
subproblem,
begin
0 <= inventory <= 2, (SDDP.State, initial_value = 0.0)
buy >= 0
demand
end
)
@constraint(subproblem, demand == inventory.in - inventory.out + buy)
if node == :Ad || node == :Bd || node == :D
JuMP.fix(demand, 0)
@stageobjective(subproblem, buy)
else
SDDP.parameterize(subproblem, demand_values, demand_prob[node]) do ω
return JuMP.fix(demand, ω)
end
@stageobjective(subproblem, 2 * buy + inventory.out)
end
end
# Train the policy.
Random.seed!(123)
SDDP.train(
model;
iteration_limit = 100,
cut_type = SDDP.SINGLE_CUT,
log_frequency = 10,
)
results = SDDP.simulate(model, 500)
objectives =
[sum(s[:stage_objective] for s in simulation) for simulation in results]
sample_mean = round(Statistics.mean(objectives); digits = 2)
sample_ci = round(1.96 * Statistics.std(objectives) / sqrt(500); digits = 2)
@test SDDP.calculate_bound(model) ≈ sample_mean atol = sample_ci
return
end
inventory_management_problem()------------------------------------------------------------------------------
SDDP.jl (c) Oscar Dowson and SDDP.jl contributors, 2017-23
Problem
Nodes : 4
State variables : 1
Scenarios : Inf
Existing cuts : false
Subproblem structure : (min, max)
Variables : (7, 7)
VariableRef in MOI.EqualTo{Float64} : (1, 1)
VariableRef in MOI.LessThan{Float64} : (3, 3)
AffExpr in MOI.GreaterThan{Float64} : (2, 2)
AffExpr in MOI.EqualTo{Float64} : (1, 1)
VariableRef in MOI.GreaterThan{Float64} : (5, 5)
Options
Solver : serial mode
Risk measure : SDDP.Expectation()
Sampling scheme : SDDP.InSampleMonteCarlo
Numerical stability report
Non-zero Matrix range [1e+00, 1e+00]
Non-zero Objective range [1e+00, 2e+00]
Non-zero Bounds range [2e+00, 1e+02]
Non-zero RHS range [0e+00, 0e+00]
No problems detected
Iteration Simulation Bound Time (s) Proc. ID # Solves
10 4.787277e+00 9.346930e+00 1.925600e-01 1 900
20 6.374753e+00 1.361934e+01 3.971820e-01 1 1720
30 2.813321e+01 1.651297e+01 8.020878e-01 1 3036
40 1.654759e+01 1.632970e+01 1.235394e+00 1 4192
50 3.570941e+00 1.846889e+01 1.571749e+00 1 5020
60 1.087425e+01 1.890254e+01 1.988171e+00 1 5808
70 9.381610e+00 1.940320e+01 2.375469e+00 1 6540
80 5.648731e+01 1.962435e+01 2.665044e+00 1 7088
90 3.879273e+01 1.981008e+01 3.356170e+00 1 8180
100 7.870187e+00 1.997117e+01 3.655151e+00 1 8664
Terminating training
Status : iteration_limit
Total time (s) : 3.655151e+00
Total solves : 8664
Best bound : 1.997117e+01
Simulation CI : 2.275399e+01 ± 4.541987e+00
------------------------------------------------------------------------------This page was generated using Literate.jl.