Skip to content

Constructive NonAutoregressive

DeepACO

AntSystem

AntSystem(
    log_heuristic: Tensor,
    n_ants: int = 20,
    alpha: float = 1.0,
    beta: float = 1.0,
    decay: float = 0.95,
    Q: Optional[float] = None,
    temperature: float = 0.1,
    pheromone: Optional[Tensor] = None,
    require_logprobs: bool = False,
    use_local_search: bool = False,
    use_nls: bool = False,
    n_perturbations: int = 5,
    local_search_params: dict = {},
    perturbation_params: dict = {},
    start_node: Optional[int] = None,
)

Implements the Ant System algorithm: https://doi.org/10.1109/3477.484436.

Parameters:

  • log_heuristic (Tensor) –

    Logarithm of the heuristic matrix.

  • n_ants (int, default: 20 ) –

    Number of ants to be used in the algorithm. Defaults to 20.

  • alpha (float, default: 1.0 ) –

    Importance of pheromone in the decision-making process. Defaults to 1.0.

  • beta (float, default: 1.0 ) –

    Importance of heuristic information in the decision-making process. Defaults to 1.0.

  • decay (float, default: 0.95 ) –

    Rate at which pheromone evaporates. Should be between 0 and 1. Defaults to 0.95.

  • Q (Optional[float], default: None ) –

    Rate at which pheromone deposits. Defaults to 1 / n_ants.

  • temperature (float, default: 0.1 ) –

    Temperature for the softmax during decoding. Defaults to 0.1.

  • pheromone (Optional[Tensor], default: None ) –

    Initial pheromone matrix. Defaults to torch.ones_like(log_heuristic).

  • require_logprobs (bool, default: False ) –

    Whether to require the log probability of actions. Defaults to False.

  • use_local_search (bool, default: False ) –

    Whether to use local_search provided by the env. Default to False.

  • use_nls (bool, default: False ) –

    Whether to use neural-guided local search provided by the env. Default to False.

  • n_perturbations (int, default: 5 ) –

    Number of perturbations to be used for nls. Defaults to 5.

  • local_search_params (dict, default: {} ) –

    Arguments to be passed to the local_search.

  • perturbation_params (dict, default: {} ) –

    Arguments to be passed to the perturbation used for nls.

Source code in rl4co/models/zoo/deepaco/antsystem.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def __init__(
    self,
    log_heuristic: Tensor,
    n_ants: int = 20,
    alpha: float = 1.0,
    beta: float = 1.0,
    decay: float = 0.95,
    Q: Optional[float] = None,
    temperature: float = 0.1,
    pheromone: Optional[Tensor] = None,
    require_logprobs: bool = False,
    use_local_search: bool = False,
    use_nls: bool = False,
    n_perturbations: int = 5,
    local_search_params: dict = {},
    perturbation_params: dict = {},
    start_node: Optional[int] = None,
):
    self.batch_size = log_heuristic.shape[0]
    self.n_ants = n_ants
    self.alpha = alpha
    self.beta = beta
    self.decay = decay
    self.Q = 1 / self.n_ants if Q is None else Q
    self.temperature = temperature

    self.log_heuristic = log_heuristic / self.temperature

    if pheromone is None:
        self.pheromone = torch.ones_like(log_heuristic)
        self.pheromone.fill_(0.0005)
    else:
        self.pheromone = pheromone

    self.final_actions = self.final_reward = None
    self.require_logprobs = require_logprobs
    self.all_records = []

    self.use_local_search = use_local_search
    assert not (use_nls and not use_local_search), "use_nls requires use_local_search"
    self.use_nls = use_nls
    self.n_perturbations = n_perturbations
    self.local_search_params = local_search_params
    self.perturbation_params = perturbation_params
    self.start_node = start_node

    self._batchindex = torch.arange(self.batch_size, device=log_heuristic.device)

run

run(
    td_initial: TensorDict,
    env: RL4COEnvBase,
    n_iterations: int,
) -> Tuple[TensorDict, Tensor, Tensor]

Run the Ant System algorithm for a specified number of iterations.

Parameters:

  • td_initial (TensorDict) –

    Initial state of the problem.

  • env (RL4COEnvBase) –

    Environment representing the problem.

  • n_iterations (int) –

    Number of iterations to run the algorithm.

Returns:

  • td ( TensorDict ) –

    The final state of the problem.

  • actions ( Tensor ) –

    The final actions chosen by the algorithm.

  • reward ( Tensor ) –

    The final reward achieved by the algorithm.

Source code in rl4co/models/zoo/deepaco/antsystem.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def run(
    self,
    td_initial: TensorDict,
    env: RL4COEnvBase,
    n_iterations: int,
) -> Tuple[TensorDict, Tensor, Tensor]:
    """Run the Ant System algorithm for a specified number of iterations.

    Args:
        td_initial: Initial state of the problem.
        env: Environment representing the problem.
        n_iterations: Number of iterations to run the algorithm.

    Returns:
        td: The final state of the problem.
        actions: The final actions chosen by the algorithm.
        reward: The final reward achieved by the algorithm.
    """
    for _ in range(n_iterations):
        # reset environment
        td = td_initial.clone()
        self._one_step(td, env)

    action_matrix = self._convert_final_action_to_matrix()
    assert action_matrix is not None and self.final_reward is not None
    td, env = self._recreate_final_routes(td_initial, env, action_matrix)

    return td, action_matrix, self.final_reward
local_search(
    td: TensorDict, env: RL4COEnvBase, actions: Tensor
) -> Tuple[Tensor, Tensor]

Perform local search on the actions and reward obtained.

Parameters:

  • td (TensorDict) –

    Current state of the problem.

  • env (RL4COEnvBase) –

    Environment representing the problem.

  • actions (Tensor) –

    Actions chosen by the algorithm.

Returns:

  • actions ( Tensor ) –

    The modified actions

  • reward ( Tensor ) –

    The modified reward

Source code in rl4co/models/zoo/deepaco/antsystem.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def local_search(
    self, td: TensorDict, env: RL4COEnvBase, actions: Tensor
) -> Tuple[Tensor, Tensor]:
    """Perform local search on the actions and reward obtained.

    Args:
        td: Current state of the problem.
        env: Environment representing the problem.
        actions: Actions chosen by the algorithm.

    Returns:
        actions: The modified actions
        reward: The modified reward
    """
    td_cpu = td.detach().cpu()  # Convert to CPU in advance to minimize the overhead from device transfer
    td_cpu["distances"] = get_distance_matrix(td_cpu["locs"])
    # TODO: avoid or generalize this, e.g., pre-compute for local search in each env
    actions = actions.detach().cpu()
    best_actions = env.local_search(td=td_cpu, actions=actions, **self.local_search_params)
    best_rewards = env.get_reward(td_cpu, best_actions)

    if self.use_nls:
        td_cpu_perturb = td_cpu.clone()
        td_cpu_perturb["distances"] = torch.tile(self.heuristic_dist, (self.n_ants, 1, 1))
        new_actions = best_actions.clone()

        for _ in range(self.n_perturbations):
            perturbed_actions = env.local_search(
                td=td_cpu_perturb, actions=new_actions, **self.perturbation_params
            )
            new_actions = env.local_search(td=td_cpu, actions=perturbed_actions, **self.local_search_params)
            new_rewards = env.get_reward(td_cpu, new_actions)

            improved_indices = new_rewards > best_rewards
            best_actions = env.replace_selected_actions(best_actions, new_actions, improved_indices)
            best_rewards[improved_indices] = new_rewards[improved_indices]

    best_actions = best_actions.to(td.device)
    best_rewards = best_rewards.to(td.device)

    return best_actions, best_rewards

get_logp

get_logp()

Get the log probability (logprobs) values recorded during the execution of the algorithm.

Returns:

  • results

    Tuple containing the log probability values, actions chosen, rewards obtained, and mask values (if available).

Raises:

Source code in rl4co/models/zoo/deepaco/antsystem.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def get_logp(self):
    """Get the log probability (logprobs) values recorded during the execution of the algorithm.

    Returns:
        results: Tuple containing the log probability values,
            actions chosen, rewards obtained, and mask values (if available).

    Raises:
        AssertionError: If `require_logp` is not enabled.
    """

    assert (
        self.require_logprobs
    ), "Please enable `require_logp` to record logprobs values"

    logprobs_list, actions_list, reward_list, mask_list = [], [], [], []

    for logprobs, actions, reward, mask in self.all_records:
        logprobs_list.append(logprobs)
        actions_list.append(actions)
        reward_list.append(reward)
        mask_list.append(mask)

    if mask_list[0] is None:
        mask_list = None
    else:
        mask_list = torch.stack(mask_list, 0)

    # reset records
    self.all_records = []

    return (
        torch.stack(logprobs_list, 0),
        torch.stack(actions_list, 0),
        torch.stack(reward_list, 0),
        mask_list,
    )

DeepACO

DeepACO(
    env: RL4COEnvBase,
    policy: Optional[DeepACOPolicy] = None,
    baseline: Union[REINFORCEBaseline, str] = "no",
    policy_kwargs: dict = {},
    baseline_kwargs: dict = {},
    **kwargs
)

Bases: REINFORCE

Implements DeepACO: https://arxiv.org/abs/2309.14032.

Parameters:

  • env (RL4COEnvBase) –

    Environment to use for the algorithm

  • policy (Optional[DeepACOPolicy], default: None ) –

    Policy to use for the algorithm

  • baseline (Union[REINFORCEBaseline, str], default: 'no' ) –

    REINFORCE baseline. Defaults to exponential

  • policy_kwargs (dict, default: {} ) –

    Keyword arguments for policy

  • baseline_kwargs (dict, default: {} ) –

    Keyword arguments for baseline

  • **kwargs

    Keyword arguments passed to the superclass

Source code in rl4co/models/zoo/deepaco/model.py
21
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(
    self,
    env: RL4COEnvBase,
    policy: Optional[DeepACOPolicy] = None,
    baseline: Union[REINFORCEBaseline, str] = "no",
    policy_kwargs: dict = {},
    baseline_kwargs: dict = {},
    **kwargs,
):
    if policy is None:
        policy = DeepACOPolicy(env_name=env.name, **policy_kwargs)

    super().__init__(env, policy, baseline, baseline_kwargs, **kwargs)

DeepACOPolicy

DeepACOPolicy(
    encoder: Optional[NonAutoregressiveEncoder] = None,
    env_name: str = "tsp",
    temperature: float = 1.0,
    aco_class: Optional[Type[AntSystem]] = None,
    aco_kwargs: dict = {},
    train_with_local_search: bool = True,
    n_ants: Optional[Union[int, dict]] = None,
    n_iterations: Optional[Union[int, dict]] = None,
    ls_reward_aug_W: float = 0.95,
    **encoder_kwargs
)

Bases: NonAutoregressivePolicy

Implememts DeepACO policy based on :class:NonAutoregressivePolicy. Introduced by Ye et al. (2023): https://arxiv.org/abs/2309.14032. This policy uses a Non-Autoregressive Graph Neural Network to generate heatmaps, which are then used to run Ant Colony Optimization (ACO) to construct solutions.

Parameters:

  • encoder (Optional[NonAutoregressiveEncoder], default: None ) –

    Encoder module. Can be passed by sub-classes

  • env_name (str, default: 'tsp' ) –

    Name of the environment used to initialize embeddings

  • temperature (float, default: 1.0 ) –

    Temperature for the softmax during decoding. Defaults to 0.1.

  • aco_class (Optional[Type[AntSystem]], default: None ) –

    Class representing the ACO algorithm to be used. Defaults to :class:AntSystem.

  • aco_kwargs (dict, default: {} ) –

    Additional arguments to be passed to the ACO algorithm.

  • n_ants (Optional[Union[int, dict]], default: None ) –

    Number of ants to be used in the ACO algorithm. Can be an integer or dictionary. Defaults to 20.

  • n_iterations (Optional[Union[int, dict]], default: None ) –

    Number of iterations to run the ACO algorithm. Can be an integer or dictionary. Defaults to dict(train=1, val=20, test=100).

  • ls_reward_aug_W (float, default: 0.95 ) –

    Coefficient to be used for the reward augmentation with the local search. Defaults to 0.95.

  • encoder_kwargs

    Additional arguments to be passed to the encoder.

Source code in rl4co/models/zoo/deepaco/policy.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def __init__(
    self,
    encoder: Optional[NonAutoregressiveEncoder] = None,
    env_name: str = "tsp",
    temperature: float = 1.0,
    aco_class: Optional[Type[AntSystem]] = None,
    aco_kwargs: dict = {},
    train_with_local_search: bool = True,
    n_ants: Optional[Union[int, dict]] = None,
    n_iterations: Optional[Union[int, dict]] = None,
    ls_reward_aug_W: float = 0.95,
    **encoder_kwargs,
):
    if encoder is None:
        encoder = NARGNNEncoder(**encoder_kwargs)

    super(DeepACOPolicy, self).__init__(
        encoder=encoder,
        env_name=env_name,
        temperature=temperature,
        train_decode_type="multistart_sampling",
        val_decode_type="multistart_sampling",
        test_decode_type="multistart_sampling",
    )

    self.aco_class = AntSystem if aco_class is None else aco_class
    self.aco_kwargs = aco_kwargs
    self.train_with_local_search = train_with_local_search
    self.n_ants = merge_with_defaults(n_ants, train=30, val=48, test=48)
    self.n_iterations = merge_with_defaults(n_iterations, train=1, val=5, test=10)
    self.ls_reward_aug_W = ls_reward_aug_W

forward

forward(
    td_initial: TensorDict,
    env: Optional[Union[str, RL4COEnvBase]] = None,
    calc_reward: bool = True,
    phase: str = "train",
    actions=None,
    return_actions: bool = True,
    return_hidden: bool = True,
    **kwargs
)

Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions. See :class:NonAutoregressivePolicy for more details during the training phase.

Source code in rl4co/models/zoo/deepaco/policy.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def forward(
    self,
    td_initial: TensorDict,
    env: Optional[Union[str, RL4COEnvBase]] = None,
    calc_reward: bool = True,
    phase: str = "train",
    actions=None,
    return_actions: bool = True,
    return_hidden: bool = True,
    **kwargs,
):
    """
    Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions.
    See :class:`NonAutoregressivePolicy` for more details during the training phase.
    """
    n_ants = self.n_ants[phase]
    # Instantiate environment if needed
    if (phase != "train" or self.ls_reward_aug_W > 0) and (env is None or isinstance(env, str)):
        env_name = self.env_name if env is None else env
        env = get_env(env_name)

    if phase == "train":
        select_start_nodes_fn = partial(
            self.aco_class.select_start_node_fn, start_node=self.aco_kwargs.get("start_node", None)
        )
        kwargs.update({"select_start_nodes_fn": select_start_nodes_fn})
        #  we just use the constructive policy
        outdict = super().forward(
            td_initial,
            env,
            phase=phase,
            decode_type="multistart_sampling",
            calc_reward=calc_reward,
            num_starts=n_ants,
            actions=actions,
            return_actions=return_actions,
            return_hidden=return_hidden,
            **kwargs,
        )

        # manually compute the advantage
        reward = unbatchify(outdict["reward"], n_ants)
        advantage = reward - reward.mean(dim=1, keepdim=True)

        if self.ls_reward_aug_W > 0 and self.train_with_local_search:
            heatmap_logits = outdict["hidden"]
            aco = self.aco_class(
                heatmap_logits,
                n_ants=n_ants,
                temperature=self.aco_kwargs.get("temperature", self.temperature),
                **self.aco_kwargs,
            )

            actions = outdict["actions"]
            _, ls_reward = aco.local_search(batchify(td_initial, n_ants), env, actions)

            ls_reward = unbatchify(ls_reward, n_ants)
            ls_advantage = ls_reward - ls_reward.mean(dim=1, keepdim=True)
            advantage = advantage * (1 - self.ls_reward_aug_W) + ls_advantage * self.ls_reward_aug_W

        outdict["advantage"] = advantage
        outdict["log_likelihood"] = unbatchify(outdict["log_likelihood"], n_ants)

        return outdict

    heatmap_logits, _ = self.encoder(td_initial)

    aco = self.aco_class(
        heatmap_logits,
        n_ants=self.n_ants[phase],
        temperature=self.aco_kwargs.get("temperature", self.temperature),
        **self.aco_kwargs,
    )
    td, actions, reward = aco.run(td_initial, env, self.n_iterations[phase])

    out = {}
    if calc_reward:
        out["reward"] = reward
    if return_actions:
        out["actions"] = actions

    return out

NAR-GNN

NARGNNPolicy

NARGNNPolicy(
    encoder: Optional[NonAutoregressiveEncoder] = None,
    decoder: Optional[NonAutoregressiveDecoder] = None,
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    train_decode_type: str = "multistart_sampling",
    val_decode_type: str = "multistart_greedy",
    test_decode_type: str = "multistart_greedy",
    **constructive_policy_kw
)

Bases: NonAutoregressivePolicy

Base Non-autoregressive policy for NCO construction methods. This creates a heatmap of NxN for N nodes (i.e., heuristic) that models the probability to go from one node to another for all nodes.

The policy performs the following steps
  1. Encode the environment initial state into node embeddings
  2. Decode (non-autoregressively) to construct the solution to the NCO problem
Warning

The effectiveness of the non-autoregressive approach can vary significantly across different problem types and configurations. It may require careful tuning of the model architecture and decoding strategy to achieve competitive results.

Parameters:

  • encoder (Optional[NonAutoregressiveEncoder], default: None ) –

    Encoder module. Can be passed by sub-classes

  • decoder (Optional[NonAutoregressiveDecoder], default: None ) –

    Decoder module. Note that this moule defaults to the non-autoregressive decoder

  • embed_dim (int, default: 64 ) –

    Dimension of the embeddings

  • env_name (str, default: 'tsp' ) –

    Name of the environment used to initialize embeddings

  • init_embedding (Optional[Module], default: None ) –

    Model to use for the initial embedding. If None, use the default embedding for the environment

  • edge_embedding (Optional[Module], default: None ) –

    Model to use for the edge embedding. If None, use the default embedding for the environment

  • graph_network (Optional[Module], default: None ) –

    Model to use for the graph network. If None, use the default embedding for the environment

  • heatmap_generator (Optional[Module], default: None ) –

    Model to use for the heatmap generator. If None, use the default embedding for the environment

  • num_layers_heatmap_generator (int, default: 5 ) –

    Number of layers in the heatmap generator

  • num_layers_graph_encoder (int, default: 15 ) –

    Number of layers in the graph encoder

  • act_fn

    Activation function to use in the encoder

  • agg_fn

    Aggregation function to use in the encoder

  • linear_bias (bool, default: True ) –

    Whether to use bias in the encoder

  • train_decode_type (str, default: 'multistart_sampling' ) –

    Type of decoding during training

  • val_decode_type (str, default: 'multistart_greedy' ) –

    Type of decoding during validation

  • test_decode_type (str, default: 'multistart_greedy' ) –

    Type of decoding during testing

  • **constructive_policy_kw

    Unused keyword arguments

Source code in rl4co/models/zoo/nargnn/policy.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def __init__(
    self,
    encoder: Optional[NonAutoregressiveEncoder] = None,
    decoder: Optional[NonAutoregressiveDecoder] = None,
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    train_decode_type: str = "multistart_sampling",
    val_decode_type: str = "multistart_greedy",
    test_decode_type: str = "multistart_greedy",
    **constructive_policy_kw,
):
    if len(constructive_policy_kw) > 0:
        log.warn(f"Unused kwargs: {constructive_policy_kw}")

    if encoder is None:
        encoder = NARGNNEncoder(
            embed_dim=embed_dim,
            env_name=env_name,
            init_embedding=init_embedding,
            edge_embedding=edge_embedding,
            graph_network=graph_network,
            heatmap_generator=heatmap_generator,
            num_layers_heatmap_generator=num_layers_heatmap_generator,
            num_layers_graph_encoder=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
            linear_bias=linear_bias,
        )

    # The decoder generates logits given the current td and heatmap
    if decoder is None:
        decoder = NonAutoregressiveDecoder()
    else:
        # check if the decoder has trainable parameters
        if any(p.requires_grad for p in decoder.parameters()):
            log.error(
                "The decoder contains trainable parameters. This should not happen in a non-autoregressive policy."
            )

    # Pass to constructive policy
    super(NARGNNPolicy, self).__init__(
        encoder=encoder,
        decoder=decoder,
        env_name=env_name,
        train_decode_type=train_decode_type,
        val_decode_type=val_decode_type,
        test_decode_type=test_decode_type,
        **constructive_policy_kw,
    )

EdgeHeatmapGenerator

EdgeHeatmapGenerator(
    embed_dim: int,
    num_layers: int,
    act_fn: Union[str, Callable] = "silu",
    linear_bias: bool = True,
    undirected_graph: bool = True,
)

Bases: Module

MLP for converting edge embeddings to heatmaps.

Parameters:

  • embed_dim (int) –

    Dimension of the embeddings

  • num_layers (int) –

    The number of linear layers in the network.

  • act_fn (Union[str, Callable], default: 'silu' ) –

    Activation function. Defaults to "silu".

  • linear_bias (bool, default: True ) –

    Use bias in linear layers. Defaults to True.

  • undirected_graph (bool, default: True ) –

    Whether the graph is undirected. Defaults to True.

Source code in rl4co/models/zoo/nargnn/encoder.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def __init__(
    self,
    embed_dim: int,
    num_layers: int,
    act_fn: Union[str, Callable] = "silu",
    linear_bias: bool = True,
    undirected_graph: bool = True,
) -> None:
    super(EdgeHeatmapGenerator, self).__init__()

    self.linears = nn.ModuleList(
        [
            nn.Linear(embed_dim, embed_dim, bias=linear_bias)
            for _ in range(num_layers - 1)
        ]
    )
    self.output = nn.Linear(embed_dim, 1, bias=linear_bias)

    self.act = getattr(nn.functional, act_fn) if isinstance(act_fn, str) else act_fn

    self.undirected_graph = undirected_graph

NARGNNEncoder

NARGNNEncoder(
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
)

Bases: NonAutoregressiveEncoder

Anisotropic Graph Neural Network encoder with edge-gating mechanism as in Joshi et al. (2022), and used in DeepACO (Ye et al., 2023). This creates a heatmap of NxN for N nodes (i.e., heuristic) that models the probability to go from one node to another for all nodes. This model utilizes a multi-layer perceptron (MLP) approach to predict edge attributes directly from the input graph features, which are then transformed into a heatmap representation to facilitate the decoding of the solution. The decoding process is managed by a specified strategy which could vary from simple greedy selection to more complex sampling methods.

Tip

This decoder's performance heavily relies on the ability of the MLP to capture the dependencies between different parts of the solution without the iterative refinement provided by autoregressive models. It is particularly useful in scenarios where the solution space can be effectively explored in a parallelized manner or when the solution components are largely independent.

Parameters:

  • embed_dim (int, default: 64 ) –

    Dimension of the node embeddings

  • env_name (str, default: 'tsp' ) –

    Name of the environment used to initialize embeddings

  • num_layers

    Number of layers in the encoder

  • init_embedding (Optional[Module], default: None ) –

    Model to use for the initial embedding. If None, use the default embedding for the environment

  • edge_embedding (Optional[Module], default: None ) –

    Model to use for the edge embedding. If None, use the default embedding for the environment

  • graph_network (Optional[Module], default: None ) –

    Model to use for the graph network. If None, use the default network for the environment

  • heatmap_generator (Optional[Module], default: None ) –

    Model to use for the heatmap generator. If None, use the default network for the environment

  • num_layers_heatmap_generator (int, default: 5 ) –

    Number of layers in the heatmap generator

  • num_layers_graph_encoder (int, default: 15 ) –

    Number of layers in the graph encoder

  • act_fn

    The activation function to use in each GNNLayer, see https://pytorch.org/docs/stable/nn.functional.html#non-linear-activation-functions for available options. Defaults to 'silu'.

  • agg_fn

    The aggregation function to use in each GNNLayer for pooling features. Options: 'add', 'mean', 'max'. Defaults to 'mean'.

  • linear_bias (bool, default: True ) –

    Use bias in linear layers. Defaults to True.

  • k_sparse (Optional[int], default: None ) –

    Number of edges to keep for each node. Defaults to None.

Source code in rl4co/models/zoo/nargnn/encoder.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def __init__(
    self,
    embed_dim: int = 64,
    env_name: str = "tsp",
    # TODO: pass network
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
):
    super(NonAutoregressiveEncoder, self).__init__()
    self.env_name = env_name

    self.init_embedding = (
        env_init_embedding(self.env_name, {"embed_dim": embed_dim})
        if init_embedding is None
        else init_embedding
    )

    self.edge_embedding = (
        env_edge_embedding(self.env_name, {"embed_dim": embed_dim, "k_sparse": k_sparse})
        if edge_embedding is None
        else edge_embedding
    )

    self.graph_network = (
        GNNEncoder(
            embed_dim=embed_dim,
            num_layers=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
        )
        if graph_network is None
        else graph_network
    )

    self.heatmap_generator = (
        EdgeHeatmapGenerator(
            embed_dim=embed_dim,
            num_layers=num_layers_heatmap_generator,
            linear_bias=linear_bias,
        )
        if heatmap_generator is None
        else heatmap_generator
    )

forward

forward(td: TensorDict)

Forward pass of the encoder. Transform the input TensorDict into the latent representation.

Source code in rl4co/models/zoo/nargnn/encoder.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def forward(self, td: TensorDict):
    """Forward pass of the encoder.
    Transform the input TensorDict into the latent representation.
    """
    # Transfer to embedding space
    node_embed = self.init_embedding(td)
    graph = self.edge_embedding(td, node_embed)

    # Process embedding into graph
    # TODO: standardize?
    graph.x, graph.edge_attr = self.graph_network(
        graph.x, graph.edge_index, graph.edge_attr
    )

    # Generate heatmap logits
    heatmap_logits = self.heatmap_generator(graph)

    # Return latent representation (i.e. heatmap logits) and initial embeddings
    return heatmap_logits, node_embed

NARGNNNodeEncoder

NARGNNNodeEncoder(
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
)

Bases: NARGNNEncoder

In this case, we just use the node embeddings from the graph without transforming them into a heatmap.

Source code in rl4co/models/zoo/nargnn/encoder.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def __init__(
    self,
    embed_dim: int = 64,
    env_name: str = "tsp",
    # TODO: pass network
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
):
    super(NonAutoregressiveEncoder, self).__init__()
    self.env_name = env_name

    self.init_embedding = (
        env_init_embedding(self.env_name, {"embed_dim": embed_dim})
        if init_embedding is None
        else init_embedding
    )

    self.edge_embedding = (
        env_edge_embedding(self.env_name, {"embed_dim": embed_dim, "k_sparse": k_sparse})
        if edge_embedding is None
        else edge_embedding
    )

    self.graph_network = (
        GNNEncoder(
            embed_dim=embed_dim,
            num_layers=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
        )
        if graph_network is None
        else graph_network
    )

    self.heatmap_generator = (
        EdgeHeatmapGenerator(
            embed_dim=embed_dim,
            num_layers=num_layers_heatmap_generator,
            linear_bias=linear_bias,
        )
        if heatmap_generator is None
        else heatmap_generator
    )

forward

forward(td: TensorDict)

Forward pass of the encoder. Transform the input TensorDict into the latent representation.

Source code in rl4co/models/zoo/nargnn/encoder.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def forward(self, td: TensorDict):
    # Transfer to embedding space
    node_embed = self.init_embedding(td)
    graph = self.edge_embedding(td, node_embed)

    # Process embedding into graph
    # TODO: standardize?
    graph.x, graph.edge_attr = self.graph_network(
        graph.x, graph.edge_index, graph.edge_attr
    )

    proc_embeds = graph.x
    batch_size = node_embed.shape[0]
    # reshape proc_embeds from [bs*n, h] to [bs, n, h]
    proc_embeds = proc_embeds.reshape(batch_size, -1, proc_embeds.shape[1])
    return proc_embeds, node_embed