Partially observable inventory management
This tutorial was generated using Literate.jl. Download the source as a .jl
file. Download the source as a .ipynb
file.
using SDDP, HiGHS, Random, Statistics, Test
function inventory_management_problem()
demand_values = [1.0, 2.0]
demand_prob = Dict(:Ah => [0.2, 0.8], :Bh => [0.8, 0.2])
graph = SDDP.Graph(
:root_node,
[:Ad, :Ah, :Bd, :Bh],
[
(:root_node => :Ad, 0.5),
(:root_node => :Bd, 0.5),
(:Ad => :Ah, 1.0),
(:Ah => :Ad, 0.8),
(:Ah => :Bd, 0.1),
(:Bd => :Bh, 1.0),
(:Bh => :Bd, 0.8),
(:Bh => :Ad, 0.1),
],
)
SDDP.add_ambiguity_set(graph, [:Ad, :Bd], 1e2)
SDDP.add_ambiguity_set(graph, [:Ah, :Bh], 1e2)
model = SDDP.PolicyGraph(
graph;
lower_bound = 0.0,
optimizer = HiGHS.Optimizer,
) do subproblem, node
@variables(
subproblem,
begin
0 <= inventory <= 2, (SDDP.State, initial_value = 0.0)
buy >= 0
demand
end
)
@constraint(subproblem, demand == inventory.in - inventory.out + buy)
if node == :Ad || node == :Bd || node == :D
JuMP.fix(demand, 0)
@stageobjective(subproblem, buy)
else
SDDP.parameterize(subproblem, demand_values, demand_prob[node]) do ω
return JuMP.fix(demand, ω)
end
@stageobjective(subproblem, 2 * buy + inventory.out)
end
end
# Train the policy.
Random.seed!(123)
SDDP.train(
model;
iteration_limit = 100,
cut_type = SDDP.SINGLE_CUT,
log_frequency = 10,
parallel_scheme = SDDP.Serial(),
)
results = SDDP.simulate(model, 500; parallel_scheme = SDDP.Serial())
objectives =
[sum(s[:stage_objective] for s in simulation) for simulation in results]
sample_mean = round(Statistics.mean(objectives); digits = 2)
sample_ci = round(1.96 * Statistics.std(objectives) / sqrt(500); digits = 2)
@test SDDP.calculate_bound(model) ≈ sample_mean atol = sample_ci
return
end
inventory_management_problem()
-------------------------------------------------------------------
SDDP.jl (c) Oscar Dowson and contributors, 2017-24
-------------------------------------------------------------------
problem
nodes : 4
state variables : 1
scenarios : Inf
existing cuts : false
options
solver : serial mode
risk measure : SDDP.Expectation()
sampling scheme : SDDP.InSampleMonteCarlo
subproblem structure
VariableRef : [7, 7]
AffExpr in MOI.EqualTo{Float64} : [1, 1]
AffExpr in MOI.GreaterThan{Float64} : [2, 2]
VariableRef in MOI.EqualTo{Float64} : [1, 1]
VariableRef in MOI.GreaterThan{Float64} : [5, 5]
VariableRef in MOI.LessThan{Float64} : [3, 3]
numerical stability report
matrix range [1e+00, 1e+00]
objective range [1e+00, 2e+00]
bounds range [2e+00, 1e+02]
rhs range [0e+00, 0e+00]
-------------------------------------------------------------------
iteration simulation bound time (s) solves pid
-------------------------------------------------------------------
10 4.787277e+00 9.346930e+00 1.356088e+00 900 1
20 6.374753e+00 1.361934e+01 1.524785e+00 1720 1
30 2.813321e+01 1.651297e+01 1.882141e+00 3036 1
40 1.654759e+01 1.632970e+01 2.241515e+00 4192 1
50 3.570941e+00 1.846889e+01 2.494816e+00 5020 1
60 1.087425e+01 1.890254e+01 2.785500e+00 5808 1
70 9.381610e+00 1.940320e+01 3.079734e+00 6540 1
80 5.648731e+01 1.962435e+01 3.302529e+00 7088 1
90 3.879273e+01 1.981008e+01 3.788458e+00 8180 1
100 7.870187e+00 1.997117e+01 4.007397e+00 8664 1
-------------------------------------------------------------------
status : iteration_limit
total time (s) : 4.007397e+00
total solves : 8664
best bound : 1.997117e+01
simulation ci : 2.275399e+01 ± 4.541987e+00
numeric issues : 0
-------------------------------------------------------------------