Skip to content

Constructive NonAutoregressive

DeepACO

Classes:

AntSystem

AntSystem(
    log_heuristic: Tensor,
    n_ants: int = 20,
    alpha: float = 1.0,
    beta: float = 1.0,
    decay: float = 0.95,
    Q: Optional[float] = None,
    pheromone: Optional[Tensor | int] = None,
    use_local_search: bool = False,
    use_nls: bool = False,
    n_perturbations: int = 1,
    local_search_params: dict = {},
    perturbation_params: dict = {},
)

Implements the Ant System algorithm: https://doi.org/10.1109/3477.484436.

Parameters:

  • log_heuristic (Tensor) –

    Logarithm of the heuristic matrix.

  • n_ants (int, default: 20 ) –

    Number of ants to be used in the algorithm. Defaults to 20.

  • alpha (float, default: 1.0 ) –

    Importance of pheromone in the decision-making process. Defaults to 1.0.

  • beta (float, default: 1.0 ) –

    Importance of heuristic information in the decision-making process. Defaults to 1.0.

  • decay (float, default: 0.95 ) –

    Rate at which pheromone evaporates. Should be between 0 and 1. Defaults to 0.95.

  • Q (Optional[float], default: None ) –

    Rate at which pheromone deposits. Defaults to 1 / n_ants.

  • pheromone (Optional[Tensor | int], default: None ) –

    Initial pheromone matrix. Defaults to torch.ones_like(log_heuristic).

  • use_local_search (bool, default: False ) –

    Whether to use local_search provided by the env. Default to False.

  • use_nls (bool, default: False ) –

    Whether to use neural-guided local search provided by the env. Default to False.

  • n_perturbations (int, default: 1 ) –

    Number of perturbations to be used for nls. Defaults to 5.

  • local_search_params (dict, default: {} ) –

    Arguments to be passed to the local_search.

  • perturbation_params (dict, default: {} ) –

    Arguments to be passed to the perturbation used for nls.

Methods:

  • run

    Run the Ant System algorithm for a specified number of iterations.

  • local_search

    Perform local search on the actions and reward obtained.

Source code in rl4co/models/zoo/deepaco/antsystem.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def __init__(
    self,
    log_heuristic: Tensor,
    n_ants: int = 20,
    alpha: float = 1.0,
    beta: float = 1.0,
    decay: float = 0.95,
    Q: Optional[float] = None,
    pheromone: Optional[Tensor | int] = None,
    use_local_search: bool = False,
    use_nls: bool = False,
    n_perturbations: int = 1,
    local_search_params: dict = {},
    perturbation_params: dict = {},
):
    self.batch_size = log_heuristic.shape[0]
    self.n_ants = n_ants
    self.alpha = alpha
    self.beta = beta
    self.decay = decay
    self.Q = 1 / self.n_ants / self.decay if Q is None else Q

    self.log_heuristic = log_heuristic

    if pheromone is None or isinstance(pheromone, int):
        self.pheromone = torch.ones_like(log_heuristic)
        self.pheromone.fill_(pheromone if isinstance(pheromone, int) else 1)
    else:
        assert pheromone.shape == log_heuristic.shape
        self.pheromone = pheromone

    self.final_actions = self.final_reward = None
    self.final_reward_cache: dict = {}

    self.use_local_search = use_local_search
    assert not (use_nls and not use_local_search), "use_nls requires use_local_search"
    self.use_nls = use_nls
    self.n_perturbations = n_perturbations
    self.local_search_params = local_search_params.copy()  # needs to be copied to avoid side effects
    self.perturbation_params = perturbation_params.copy()

    self._batchindex = torch.arange(self.batch_size, device=log_heuristic.device)

run

run(
    td_initial: TensorDict,
    env: RL4COEnvBase,
    n_iterations: int,
    decoding_kwargs: dict,
    disable_tqdm: bool = True,
) -> Tuple[Tensor, dict[int, Tensor]]

Run the Ant System algorithm for a specified number of iterations.

Parameters:

  • td_initial (TensorDict) –

    Initial state of the problem.

  • env (RL4COEnvBase) –

    Environment representing the problem.

  • n_iterations (int) –

    Number of iterations to run the algorithm.

  • decoding_kwargs (dict) –

    Keyword arguments for decoding strategy.

  • disable_tqdm (bool, default: True ) –

    Whether to disable the tqdm progress bar. Defaults to False.

Returns:

  • td ( Tensor ) –

    The final state of the problem.

  • actions ( dict[int, Tensor] ) –

    The final actions chosen by the algorithm.

  • reward ( Tuple[Tensor, dict[int, Tensor]] ) –

    The final reward achieved by the algorithm.

Source code in rl4co/models/zoo/deepaco/antsystem.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def run(
    self,
    td_initial: TensorDict,
    env: RL4COEnvBase,
    n_iterations: int,
    decoding_kwargs: dict,
    disable_tqdm: bool = True,
) -> Tuple[Tensor, dict[int, Tensor]]:
    """Run the Ant System algorithm for a specified number of iterations.

    Args:
        td_initial: Initial state of the problem.
        env: Environment representing the problem.
        n_iterations: Number of iterations to run the algorithm.
        decoding_kwargs: Keyword arguments for decoding strategy.
        disable_tqdm: Whether to disable the tqdm progress bar. Defaults to False.

    Returns:
        td: The final state of the problem.
        actions: The final actions chosen by the algorithm.
        reward: The final reward achieved by the algorithm.
    """
    pbar = trange(
        n_iterations, dynamic_ncols=True, desc="Running ACO", disable=disable_tqdm
    )
    for i in pbar:
        # reset environment
        td = td_initial.clone()
        self._one_step(td, env, decoding_kwargs)
        self.final_reward_cache[i] = self.final_reward.clone()  # type: ignore
        pbar.set_postfix({"reward": f"{self.final_reward.mean().item():.3f}"})  # type: ignore

    action_matrix = self._convert_final_action_to_matrix()
    return action_matrix, self.final_reward_cache
local_search(
    td: TensorDict,
    env: RL4COEnvBase,
    actions: Tensor,
    decoding_kwargs: dict,
) -> Tuple[Tensor, Tensor]

Perform local search on the actions and reward obtained.

Parameters:

  • td (TensorDict) –

    Current state of the problem.

  • env (RL4COEnvBase) –

    Environment representing the problem.

  • actions (Tensor) –

    Actions chosen by the algorithm.

Returns:

  • actions ( Tensor ) –

    The modified actions

  • reward ( Tensor ) –

    The modified reward

Source code in rl4co/models/zoo/deepaco/antsystem.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def local_search(
    self, td: TensorDict, env: RL4COEnvBase, actions: Tensor, decoding_kwargs: dict
) -> Tuple[Tensor, Tensor]:
    """Perform local search on the actions and reward obtained.

    Args:
        td: Current state of the problem.
        env: Environment representing the problem.
        actions: Actions chosen by the algorithm.

    Returns:
        actions: The modified actions
        reward: The modified reward
    """
    device = td.device
    if env.name in ["tsp", "cvrp"]:
        # Convert to CPU in advance to minimize the overhead from device transfer
        td = td.detach().cpu()
        # TODO: avoid or generalize this, e.g., pre-compute for local search in each env
        td["distances"] = get_distance_matrix(td["locs"])
        actions = actions.detach().cpu()
    elif env.name in ["pctsp", "op"]:  # destroy & repair local search
        self.local_search_params.update(
            {
                "decoding_kwargs": decoding_kwargs,
                "heatmap": batchify(self.log_heuristic, self.n_ants),
            }
        )
    else:
        raise NotImplementedError(f"Local search not implemented for {env.name}")

    best_actions = env.local_search(td=td, actions=actions, **self.local_search_params)
    best_rewards = env.get_reward(td, best_actions)

    if self.use_nls:
        td_perturb = td.clone()
        td_perturb["distances"] = torch.tile(self.heuristic_dist, (self.n_ants, 1, 1))
        new_actions = best_actions.clone()

        for _ in range(self.n_perturbations):
            perturbed_actions = env.local_search(
                td=td_perturb, actions=new_actions, **self.perturbation_params
            )
            new_actions = env.local_search(
                td=td, actions=perturbed_actions, **self.local_search_params
            )
            new_rewards = env.get_reward(td, new_actions)

            improved_indices = new_rewards > best_rewards
            best_actions = env.replace_selected_actions(
                best_actions, new_actions, improved_indices
            )
            best_rewards[improved_indices] = new_rewards[improved_indices]

    best_actions = best_actions.to(device)
    best_rewards = best_rewards.to(device)

    return best_actions, best_rewards

Classes:

DeepACO

DeepACO(
    env: RL4COEnvBase,
    policy: Optional[DeepACOPolicy] = None,
    baseline: Union[REINFORCEBaseline, str] = "no",
    train_with_local_search: bool = True,
    ls_reward_aug_W: float = 0.95,
    policy_kwargs: dict = {},
    baseline_kwargs: dict = {},
    **kwargs
)

Bases: REINFORCE

Implements DeepACO: https://arxiv.org/abs/2309.14032.

Parameters:

  • env (RL4COEnvBase) –

    Environment to use for the algorithm

  • policy (Optional[DeepACOPolicy], default: None ) –

    Policy to use for the algorithm

  • baseline (Union[REINFORCEBaseline, str], default: 'no' ) –

    REINFORCE baseline. Defaults to "no" because the shared baseline is manually implemented.

  • train_with_local_search (bool, default: True ) –

    Whether to train with local search. Defaults to False.

  • ls_reward_aug_W (float, default: 0.95 ) –

    Coefficient to be used for the reward augmentation with the local search. Defaults to 0.95.

  • policy_kwargs (dict, default: {} ) –

    Keyword arguments for policy

  • baseline_kwargs (dict, default: {} ) –

    Keyword arguments for baseline

  • **kwargs

    Keyword arguments passed to the superclass

Methods:

Source code in rl4co/models/zoo/deepaco/model.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def __init__(
    self,
    env: RL4COEnvBase,
    policy: Optional[DeepACOPolicy] = None,
    baseline: Union[REINFORCEBaseline, str] = "no",  # Shared baseline is manually implemented
    train_with_local_search: bool = True,
    ls_reward_aug_W: float = 0.95,
    policy_kwargs: dict = {},
    baseline_kwargs: dict = {},
    **kwargs,
):
    if policy is None:
        policy = DeepACOPolicy(
            env_name=env.name, train_with_local_search=train_with_local_search, **policy_kwargs
        )

    super().__init__(env, policy, baseline, baseline_kwargs, **kwargs)

    self.train_with_local_search = train_with_local_search
    self.ls_reward_aug_W = ls_reward_aug_W

calculate_loss

calculate_loss(
    td: TensorDict,
    batch: TensorDict,
    policy_out: dict,
    reward: Optional[Tensor] = None,
    log_likelihood: Optional[Tensor] = None,
)

Calculate loss for REINFORCE algorithm.

Parameters:

  • td (TensorDict) –

    TensorDict containing the current state of the environment

  • batch (TensorDict) –

    Batch of data. This is used to get the extra loss terms, e.g., REINFORCE baseline

  • policy_out (dict) –

    Output of the policy network

  • reward (Optional[Tensor], default: None ) –

    Reward tensor. If None, it is taken from policy_out

  • log_likelihood (Optional[Tensor], default: None ) –

    Log-likelihood tensor. If None, it is taken from policy_out

Source code in rl4co/models/zoo/deepaco/model.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def calculate_loss(
    self,
    td: TensorDict,
    batch: TensorDict,
    policy_out: dict,
    reward: Optional[torch.Tensor] = None,
    log_likelihood: Optional[torch.Tensor] = None,
):
    """Calculate loss for REINFORCE algorithm.

    Args:
        td: TensorDict containing the current state of the environment
        batch: Batch of data. This is used to get the extra loss terms, e.g., REINFORCE baseline
        policy_out: Output of the policy network
        reward: Reward tensor. If None, it is taken from `policy_out`
        log_likelihood: Log-likelihood tensor. If None, it is taken from `policy_out`
    """
    reward = policy_out["reward"]
    advantage = reward - reward.mean(dim=1, keepdim=True)  # Shared baseline

    if self.train_with_local_search:
        ls_reward = policy_out["ls_reward"]
        ls_advantage = ls_reward - ls_reward.mean(dim=1, keepdim=True)  # Shared baseline
        weighted_advantage = advantage * (1 - self.ls_reward_aug_W) + ls_advantage * self.ls_reward_aug_W
    else:
        weighted_advantage = advantage

    return -(weighted_advantage * policy_out["log_likelihood"]).mean()

Classes:

DeepACOPolicy

DeepACOPolicy(
    encoder: Optional[NonAutoregressiveEncoder] = None,
    env_name: str = "tsp",
    temperature: float = 1.0,
    top_p: float = 0.0,
    top_k: int = 0,
    aco_class: Optional[Type[AntSystem]] = None,
    aco_kwargs: dict = {},
    train_with_local_search: bool = False,
    n_ants: Optional[Union[int, dict]] = None,
    n_iterations: Optional[Union[int, dict]] = None,
    start_node: Optional[int] = None,
    multistart: bool = False,
    k_sparse: Optional[int] = None,
    **encoder_kwargs
)

Bases: NonAutoregressivePolicy

Implememts DeepACO policy based on :class:NonAutoregressivePolicy. Introduced by Ye et al. (2023): https://arxiv.org/abs/2309.14032. This policy uses a Non-Autoregressive Graph Neural Network to generate heatmaps, which are then used to run Ant Colony Optimization (ACO) to construct solutions.

Parameters:

  • encoder (Optional[NonAutoregressiveEncoder], default: None ) –

    Encoder module. Can be passed by sub-classes

  • env_name (str, default: 'tsp' ) –

    Name of the environment used to initialize embeddings

  • temperature (float, default: 1.0 ) –

    Temperature for the softmax during decoding. Defaults to 1.0.

  • aco_class (Optional[Type[AntSystem]], default: None ) –

    Class representing the ACO algorithm to be used. Defaults to :class:AntSystem.

  • aco_kwargs (dict, default: {} ) –

    Additional arguments to be passed to the ACO algorithm.

  • train_with_local_search (bool, default: False ) –

    Whether to train with local search. Defaults to False.

  • n_ants (Optional[Union[int, dict]], default: None ) –

    Number of ants to be used in the ACO algorithm. Can be an integer or dictionary. Defaults to 20.

  • n_iterations (Optional[Union[int, dict]], default: None ) –

    Number of iterations to run the ACO algorithm. Can be an integer or dictionary. Defaults to dict(train=1, val=20, test=100).

  • encoder_kwargs

    Additional arguments to be passed to the encoder.

Methods:

  • forward

    Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions.

Source code in rl4co/models/zoo/deepaco/policy.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def __init__(
    self,
    encoder: Optional[NonAutoregressiveEncoder] = None,
    env_name: str = "tsp",
    temperature: float = 1.0,
    top_p: float = 0.0,
    top_k: int = 0,
    aco_class: Optional[Type[AntSystem]] = None,
    aco_kwargs: dict = {},
    train_with_local_search: bool = False,
    n_ants: Optional[Union[int, dict]] = None,
    n_iterations: Optional[Union[int, dict]] = None,
    start_node: Optional[int] = None,
    multistart: bool = False,
    k_sparse: Optional[int] = None,
    **encoder_kwargs,
):
    if encoder is None:
        encoder_kwargs["k_sparse"] = k_sparse
        encoder = NARGNNEncoder(env_name=env_name, **encoder_kwargs)

    self.decode_type = "multistart_sampling" if env_name == "tsp" or multistart else "sampling"

    super().__init__(
        encoder=encoder,
        env_name=env_name,
        temperature=temperature,
        train_decode_type=self.decode_type,
        val_decode_type=self.decode_type,
        test_decode_type=self.decode_type,
    )

    self.default_decoding_kwargs = {}
    self.default_decoding_kwargs["select_best"] = False
    if k_sparse is not None:
        self.default_decoding_kwargs["top_k"] = k_sparse + (0 if env_name == "tsp" else 1)  # 1 for depot
    if "multistart" in self.decode_type:
        select_start_nodes_fn = partial(self.select_start_node_fn, start_node=start_node)
        self.default_decoding_kwargs.update(
            {"multistart": True, "select_start_nodes_fn": select_start_nodes_fn}
        )
    else:
        self.default_decoding_kwargs.update(
            {"multisample": True}
        )

    # For now, top_p and top_k are only used to filter logits (not passed to decoder)
    self.top_p = top_p
    self.top_k = top_k

    self.aco_class = AntSystem if aco_class is None else aco_class
    self.aco_kwargs = aco_kwargs
    self.train_with_local_search = train_with_local_search
    if train_with_local_search:
        assert self.aco_kwargs.get("use_local_search", False)
    self.n_ants = merge_with_defaults(n_ants, train=30, val=48, test=48)
    self.n_iterations = merge_with_defaults(n_iterations, train=1, val=5, test=10)

forward

forward(
    td_initial: TensorDict,
    env: Optional[Union[str, RL4COEnvBase]] = None,
    phase: str = "train",
    return_actions: bool = True,
    return_hidden: bool = True,
    actions=None,
    **decoding_kwargs
)

Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions. See :class:NonAutoregressivePolicy for more details during the training phase.

Source code in rl4co/models/zoo/deepaco/policy.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def forward(
    self,
    td_initial: TensorDict,
    env: Optional[Union[str, RL4COEnvBase]] = None,
    phase: str = "train",
    return_actions: bool = True,
    return_hidden: bool = True,
    actions=None,
    **decoding_kwargs,
):
    """
    Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions.
    See :class:`NonAutoregressivePolicy` for more details during the training phase.
    """
    n_ants = self.n_ants[phase]

    decoding_kwargs.update(self.default_decoding_kwargs)
    decoding_kwargs.update(
        {"num_starts": n_ants} if "multistart" in self.decode_type else {"num_samples": n_ants}
    )

    # Instantiate environment if needed
    if (phase != "train" or self.train_with_local_search) and (env is None or isinstance(env, str)):
        env_name = self.env_name if env is None else env
        env = get_env(env_name)
    else:
        assert isinstance(env, RL4COEnvBase), "env must be an instance of RL4COEnvBase"

    if phase == "train":
        #  we just use the constructive policy
        outdict = super().forward(
            td_initial,
            env,
            phase=phase,
            calc_reward=True,
            return_actions=return_actions,
            return_hidden=return_hidden,
            actions=actions,
            **decoding_kwargs,
        )

        outdict["reward"] = unbatchify(outdict["reward"], n_ants)

        if self.train_with_local_search:
            heatmap = outdict["hidden"]
            # TODO: Refactor this so that we don't need to use the aco object
            aco = self.aco_class(heatmap, n_ants=n_ants, **self.aco_kwargs)
            _, ls_reward = aco.local_search(
                batchify(td_initial, n_ants), env, outdict["actions"], decoding_kwargs
            )
            outdict["ls_reward"] = unbatchify(ls_reward, n_ants)

        outdict["log_likelihood"] = unbatchify(outdict["log_likelihood"], n_ants)
        return outdict

    heatmap, _ = self.encoder(td_initial)
    heatmap /= self.temperature

    if self.top_k > 0:
        self.top_k = min(self.top_k, heatmap.size(-1))  # safety check
        heatmap = modify_logits_for_top_k_filtering(heatmap, self.top_k)

    if self.top_p > 0:
        assert self.top_p <= 1.0, "top-p should be in (0, 1]."
        heatmap = modify_logits_for_top_p_filtering(heatmap, self.top_p)

    aco = self.aco_class(heatmap, n_ants=n_ants, **self.aco_kwargs)
    actions, iter_rewards = aco.run(td_initial, env, self.n_iterations[phase], decoding_kwargs)

    out = {"reward": iter_rewards[self.n_iterations[phase] - 1]}
    out.update({f"reward_{i:03d}": iter_rewards[i] for i in range(self.n_iterations[phase])})
    if return_actions:
        out["actions"] = actions

    return out

NAR-GNN

Classes:

  • NARGNNPolicy

    Base Non-autoregressive policy for NCO construction methods.

NARGNNPolicy

NARGNNPolicy(
    encoder: Optional[NonAutoregressiveEncoder] = None,
    decoder: Optional[NonAutoregressiveDecoder] = None,
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    train_decode_type: str = "multistart_sampling",
    val_decode_type: str = "multistart_greedy",
    test_decode_type: str = "multistart_greedy",
    **constructive_policy_kw
)

Bases: NonAutoregressivePolicy

Base Non-autoregressive policy for NCO construction methods. This creates a heatmap of NxN for N nodes (i.e., heuristic) that models the probability to go from one node to another for all nodes.

The policy performs the following steps
  1. Encode the environment initial state into node embeddings
  2. Decode (non-autoregressively) to construct the solution to the NCO problem
Warning

The effectiveness of the non-autoregressive approach can vary significantly across different problem types and configurations. It may require careful tuning of the model architecture and decoding strategy to achieve competitive results.

Parameters:

  • encoder (Optional[NonAutoregressiveEncoder], default: None ) –

    Encoder module. Can be passed by sub-classes

  • decoder (Optional[NonAutoregressiveDecoder], default: None ) –

    Decoder module. Note that this moule defaults to the non-autoregressive decoder

  • embed_dim (int, default: 64 ) –

    Dimension of the embeddings

  • env_name (str, default: 'tsp' ) –

    Name of the environment used to initialize embeddings

  • init_embedding (Optional[Module], default: None ) –

    Model to use for the initial embedding. If None, use the default embedding for the environment

  • edge_embedding (Optional[Module], default: None ) –

    Model to use for the edge embedding. If None, use the default embedding for the environment

  • graph_network (Optional[Module], default: None ) –

    Model to use for the graph network. If None, use the default embedding for the environment

  • heatmap_generator (Optional[Module], default: None ) –

    Model to use for the heatmap generator. If None, use the default embedding for the environment

  • num_layers_heatmap_generator (int, default: 5 ) –

    Number of layers in the heatmap generator

  • num_layers_graph_encoder (int, default: 15 ) –

    Number of layers in the graph encoder

  • act_fn

    Activation function to use in the encoder

  • agg_fn

    Aggregation function to use in the encoder

  • linear_bias (bool, default: True ) –

    Whether to use bias in the encoder

  • train_decode_type (str, default: 'multistart_sampling' ) –

    Type of decoding during training

  • val_decode_type (str, default: 'multistart_greedy' ) –

    Type of decoding during validation

  • test_decode_type (str, default: 'multistart_greedy' ) –

    Type of decoding during testing

  • **constructive_policy_kw

    Unused keyword arguments

Source code in rl4co/models/zoo/nargnn/policy.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def __init__(
    self,
    encoder: Optional[NonAutoregressiveEncoder] = None,
    decoder: Optional[NonAutoregressiveDecoder] = None,
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    train_decode_type: str = "multistart_sampling",
    val_decode_type: str = "multistart_greedy",
    test_decode_type: str = "multistart_greedy",
    **constructive_policy_kw,
):
    if len(constructive_policy_kw) > 0:
        log.warn(f"Unused kwargs: {constructive_policy_kw}")

    if encoder is None:
        encoder = NARGNNEncoder(
            embed_dim=embed_dim,
            env_name=env_name,
            init_embedding=init_embedding,
            edge_embedding=edge_embedding,
            graph_network=graph_network,
            heatmap_generator=heatmap_generator,
            num_layers_heatmap_generator=num_layers_heatmap_generator,
            num_layers_graph_encoder=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
            linear_bias=linear_bias,
        )

    # The decoder generates logits given the current td and heatmap
    if decoder is None:
        decoder = NonAutoregressiveDecoder()
    else:
        # check if the decoder has trainable parameters
        if any(p.requires_grad for p in decoder.parameters()):
            log.error(
                "The decoder contains trainable parameters. This should not happen in a non-autoregressive policy."
            )

    # Pass to constructive policy
    super(NARGNNPolicy, self).__init__(
        encoder=encoder,
        decoder=decoder,
        env_name=env_name,
        train_decode_type=train_decode_type,
        val_decode_type=val_decode_type,
        test_decode_type=test_decode_type,
        **constructive_policy_kw,
    )

Classes:

  • EdgeHeatmapGenerator

    MLP for converting edge embeddings to heatmaps.

  • NARGNNEncoder

    Anisotropic Graph Neural Network encoder with edge-gating mechanism as in Joshi et al. (2022), and used in DeepACO (Ye et al., 2023).

  • NARGNNNodeEncoder

    In this case, we just use the node embeddings from the graph

EdgeHeatmapGenerator

EdgeHeatmapGenerator(
    embed_dim: int,
    num_layers: int,
    act_fn: str | Callable = "silu",
    linear_bias: bool = True,
    undirected_graph: bool = True,
)

Bases: Module

MLP for converting edge embeddings to heatmaps.

Parameters:

  • embed_dim (int) –

    Dimension of the embeddings

  • num_layers (int) –

    The number of linear layers in the network.

  • act_fn (str | Callable, default: 'silu' ) –

    Activation function. Defaults to "silu".

  • linear_bias (bool, default: True ) –

    Use bias in linear layers. Defaults to True.

  • undirected_graph (bool, default: True ) –

    Whether the graph is undirected. Defaults to True.

Source code in rl4co/models/zoo/nargnn/encoder.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def __init__(
    self,
    embed_dim: int,
    num_layers: int,
    act_fn: str | Callable = "silu",
    linear_bias: bool = True,
    undirected_graph: bool = True,
) -> None:
    super(EdgeHeatmapGenerator, self).__init__()

    self.linears = nn.ModuleList(
        [
            nn.Linear(embed_dim, embed_dim, bias=linear_bias)
            for _ in range(num_layers - 1)
        ]
    )
    self.output = nn.Linear(embed_dim, 1, bias=linear_bias)

    self.act = getattr(nn.functional, act_fn) if isinstance(act_fn, str) else act_fn

    self.undirected_graph = undirected_graph

NARGNNEncoder

NARGNNEncoder(
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
)

Bases: NonAutoregressiveEncoder

Anisotropic Graph Neural Network encoder with edge-gating mechanism as in Joshi et al. (2022), and used in DeepACO (Ye et al., 2023). This creates a heatmap of NxN for N nodes (i.e., heuristic) that models the probability to go from one node to another for all nodes. This model utilizes a multi-layer perceptron (MLP) approach to predict edge attributes directly from the input graph features, which are then transformed into a heatmap representation to facilitate the decoding of the solution. The decoding process is managed by a specified strategy which could vary from simple greedy selection to more complex sampling methods.

Tip

This decoder's performance heavily relies on the ability of the MLP to capture the dependencies between different parts of the solution without the iterative refinement provided by autoregressive models. It is particularly useful in scenarios where the solution space can be effectively explored in a parallelized manner or when the solution components are largely independent.

Parameters:

  • embed_dim (int, default: 64 ) –

    Dimension of the node embeddings

  • env_name (str, default: 'tsp' ) –

    Name of the environment used to initialize embeddings

  • num_layers

    Number of layers in the encoder

  • init_embedding (Optional[Module], default: None ) –

    Model to use for the initial embedding. If None, use the default embedding for the environment

  • edge_embedding (Optional[Module], default: None ) –

    Model to use for the edge embedding. If None, use the default embedding for the environment

  • graph_network (Optional[Module], default: None ) –

    Model to use for the graph network. If None, use the default network for the environment

  • heatmap_generator (Optional[Module], default: None ) –

    Model to use for the heatmap generator. If None, use the default network for the environment

  • num_layers_heatmap_generator (int, default: 5 ) –

    Number of layers in the heatmap generator

  • num_layers_graph_encoder (int, default: 15 ) –

    Number of layers in the graph encoder

  • act_fn

    The activation function to use in each GNNLayer, see https://pytorch.org/docs/stable/nn.functional.html#non-linear-activation-functions for available options. Defaults to 'silu'.

  • agg_fn

    The aggregation function to use in each GNNLayer for pooling features. Options: 'add', 'mean', 'max'. Defaults to 'mean'.

  • linear_bias (bool, default: True ) –

    Use bias in linear layers. Defaults to True.

  • k_sparse (Optional[int], default: None ) –

    Number of edges to keep for each node. Defaults to None.

Methods:

  • forward

    Forward pass of the encoder.

Source code in rl4co/models/zoo/nargnn/encoder.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def __init__(
    self,
    embed_dim: int = 64,
    env_name: str = "tsp",
    # TODO: pass network
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
):
    super(NonAutoregressiveEncoder, self).__init__()
    self.env_name = env_name

    self.init_embedding = (
        env_init_embedding(self.env_name, {"embed_dim": embed_dim})
        if init_embedding is None
        else init_embedding
    )

    self.edge_embedding = (
        env_edge_embedding(
            self.env_name, {"embed_dim": embed_dim, "k_sparse": k_sparse}
        )
        if edge_embedding is None
        else edge_embedding
    )

    self.graph_network = (
        GNNEncoder(
            embed_dim=embed_dim,
            num_layers=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
        )
        if graph_network is None
        else graph_network
    )

    self.heatmap_generator = (
        EdgeHeatmapGenerator(
            embed_dim=embed_dim,
            num_layers=num_layers_heatmap_generator,
            linear_bias=linear_bias,
        )
        if heatmap_generator is None
        else heatmap_generator
    )

forward

forward(td: TensorDict)

Forward pass of the encoder. Transform the input TensorDict into the latent representation.

Source code in rl4co/models/zoo/nargnn/encoder.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def forward(self, td: TensorDict):
    """Forward pass of the encoder.
    Transform the input TensorDict into the latent representation.
    """
    # Transfer to embedding space
    node_embed = self.init_embedding(td)
    graph = self.edge_embedding(td, node_embed)

    # Process embedding into graph
    # TODO: standardize?
    graph.x, graph.edge_attr = self.graph_network(
        graph.x, graph.edge_index, graph.edge_attr
    )

    # Generate heatmap logits
    heatmap_logits = self.heatmap_generator(graph)

    # Return latent representation (i.e. heatmap logits) and initial embeddings
    return heatmap_logits, node_embed

NARGNNNodeEncoder

NARGNNNodeEncoder(
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
)

Bases: NARGNNEncoder

In this case, we just use the node embeddings from the graph without transforming them into a heatmap.

Methods:

  • forward

    Forward pass of the encoder.

Source code in rl4co/models/zoo/nargnn/encoder.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def __init__(
    self,
    embed_dim: int = 64,
    env_name: str = "tsp",
    # TODO: pass network
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
):
    super(NonAutoregressiveEncoder, self).__init__()
    self.env_name = env_name

    self.init_embedding = (
        env_init_embedding(self.env_name, {"embed_dim": embed_dim})
        if init_embedding is None
        else init_embedding
    )

    self.edge_embedding = (
        env_edge_embedding(
            self.env_name, {"embed_dim": embed_dim, "k_sparse": k_sparse}
        )
        if edge_embedding is None
        else edge_embedding
    )

    self.graph_network = (
        GNNEncoder(
            embed_dim=embed_dim,
            num_layers=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
        )
        if graph_network is None
        else graph_network
    )

    self.heatmap_generator = (
        EdgeHeatmapGenerator(
            embed_dim=embed_dim,
            num_layers=num_layers_heatmap_generator,
            linear_bias=linear_bias,
        )
        if heatmap_generator is None
        else heatmap_generator
    )

forward

forward(td: TensorDict)

Forward pass of the encoder. Transform the input TensorDict into the latent representation.

Source code in rl4co/models/zoo/nargnn/encoder.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def forward(self, td: TensorDict):
    # Transfer to embedding space
    node_embed = self.init_embedding(td)
    graph = self.edge_embedding(td, node_embed)

    # Process embedding into graph
    # TODO: standardize?
    graph.x, graph.edge_attr = self.graph_network(
        graph.x, graph.edge_index, graph.edge_attr
    )

    proc_embeds = graph.x
    batch_size = node_embed.shape[0]
    # reshape proc_embeds from [bs*n, h] to [bs, n, h]
    proc_embeds = proc_embeds.reshape(batch_size, -1, proc_embeds.shape[1])
    return proc_embeds, node_embed