Constructive NonAutoregressive¶

DeepACO¶

Classes:

AntSystem –

Implements the Ant System algorithm: https://doi.org/10.1109/3477.484436.

AntSystem ¶

AntSystem(
    log_heuristic: Tensor,
    n_ants: int = 20,
    alpha: float = 1.0,
    beta: float = 1.0,
    decay: float = 0.95,
    Q: Optional[float] = None,
    temperature: float = 0.1,
    pheromone: Optional[Tensor] = None,
    require_logprobs: bool = False,
    use_local_search: bool = False,
    use_nls: bool = False,
    n_perturbations: int = 5,
    local_search_params: dict = {},
    perturbation_params: dict = {},
    start_node: Optional[int] = None,
)

Implements the Ant System algorithm: https://doi.org/10.1109/3477.484436.

Parameters:

log_heuristic (Tensor) –

Logarithm of the heuristic matrix.
n_ants (int, default: 20 ) –

Number of ants to be used in the algorithm. Defaults to 20.
alpha (float, default: 1.0 ) –

Importance of pheromone in the decision-making process. Defaults to 1.0.
beta (float, default: 1.0 ) –

Importance of heuristic information in the decision-making process. Defaults to 1.0.
decay (float, default: 0.95 ) –

Rate at which pheromone evaporates. Should be between 0 and 1. Defaults to 0.95.
Q (Optional[float], default: None ) –

Rate at which pheromone deposits. Defaults to 1 / n_ants.
temperature (float, default: 0.1 ) –

Temperature for the softmax during decoding. Defaults to 0.1.
pheromone (Optional[Tensor], default: None ) –

Initial pheromone matrix. Defaults to torch.ones_like(log_heuristic).
require_logprobs (bool, default: False ) –

Whether to require the log probability of actions. Defaults to False.
use_local_search (bool, default: False ) –

Whether to use local_search provided by the env. Default to False.
use_nls (bool, default: False ) –

Whether to use neural-guided local search provided by the env. Default to False.
n_perturbations (int, default: 5 ) –

Number of perturbations to be used for nls. Defaults to 5.
local_search_params (dict, default: {} ) –

Arguments to be passed to the local_search.
perturbation_params (dict, default: {} ) –

Arguments to be passed to the perturbation used for nls.

Methods:

run –

Run the Ant System algorithm for a specified number of iterations.
local_search –

Perform local search on the actions and reward obtained.
get_logp –

Get the log probability (logprobs) values recorded during the execution of the algorithm.

Source code in rl4co/models/zoo/deepaco/antsystem.py

def __init__(
    self,
    log_heuristic: Tensor,
    n_ants: int = 20,
    alpha: float = 1.0,
    beta: float = 1.0,
    decay: float = 0.95,
    Q: Optional[float] = None,
    temperature: float = 0.1,
    pheromone: Optional[Tensor] = None,
    require_logprobs: bool = False,
    use_local_search: bool = False,
    use_nls: bool = False,
    n_perturbations: int = 5,
    local_search_params: dict = {},
    perturbation_params: dict = {},
    start_node: Optional[int] = None,
):
    self.batch_size = log_heuristic.shape[0]
    self.n_ants = n_ants
    self.alpha = alpha
    self.beta = beta
    self.decay = decay
    self.Q = 1 / self.n_ants if Q is None else Q
    self.temperature = temperature

    self.log_heuristic = log_heuristic / self.temperature

    if pheromone is None:
        self.pheromone = torch.ones_like(log_heuristic)
        self.pheromone.fill_(0.0005)
    else:
        self.pheromone = pheromone

    self.final_actions = self.final_reward = None
    self.require_logprobs = require_logprobs
    self.all_records = []

    self.use_local_search = use_local_search
    assert not (use_nls and not use_local_search), "use_nls requires use_local_search"
    self.use_nls = use_nls
    self.n_perturbations = n_perturbations
    self.local_search_params = local_search_params
    self.perturbation_params = perturbation_params
    self.start_node = start_node

    self._batchindex = torch.arange(self.batch_size, device=log_heuristic.device)

run ¶

run(
    td_initial: TensorDict,
    env: RL4COEnvBase,
    n_iterations: int,
) -> Tuple[TensorDict, Tensor, Tensor]

Run the Ant System algorithm for a specified number of iterations.

Parameters:

td_initial (TensorDict) –

Initial state of the problem.
env (RL4COEnvBase) –

Environment representing the problem.
n_iterations (int) –

Number of iterations to run the algorithm.

Returns:

td ( TensorDict ) –

The final state of the problem.
actions ( Tensor ) –

The final actions chosen by the algorithm.
reward ( Tensor ) –

The final reward achieved by the algorithm.

Source code in rl4co/models/zoo/deepaco/antsystem.py

def run(
    self,
    td_initial: TensorDict,
    env: RL4COEnvBase,
    n_iterations: int,
) -> Tuple[TensorDict, Tensor, Tensor]:
    """Run the Ant System algorithm for a specified number of iterations.

    Args:
        td_initial: Initial state of the problem.
        env: Environment representing the problem.
        n_iterations: Number of iterations to run the algorithm.

    Returns:
        td: The final state of the problem.
        actions: The final actions chosen by the algorithm.
        reward: The final reward achieved by the algorithm.
    """
    for _ in range(n_iterations):
        # reset environment
        td = td_initial.clone()
        self._one_step(td, env)

    action_matrix = self._convert_final_action_to_matrix()
    assert action_matrix is not None and self.final_reward is not None
    td, env = self._recreate_final_routes(td_initial, env, action_matrix)

    return td, action_matrix, self.final_reward

local_search ¶

local_search(
    td: TensorDict, env: RL4COEnvBase, actions: Tensor
) -> Tuple[Tensor, Tensor]

Perform local search on the actions and reward obtained.

Parameters:

td (TensorDict) –

Current state of the problem.
env (RL4COEnvBase) –

Environment representing the problem.
actions (Tensor) –

Actions chosen by the algorithm.

Returns:

actions ( Tensor ) –

The modified actions
reward ( Tensor ) –

The modified reward

Source code in rl4co/models/zoo/deepaco/antsystem.py

def local_search(
    self, td: TensorDict, env: RL4COEnvBase, actions: Tensor
) -> Tuple[Tensor, Tensor]:
    """Perform local search on the actions and reward obtained.

    Args:
        td: Current state of the problem.
        env: Environment representing the problem.
        actions: Actions chosen by the algorithm.

    Returns:
        actions: The modified actions
        reward: The modified reward
    """
    td_cpu = td.detach().cpu()  # Convert to CPU in advance to minimize the overhead from device transfer
    td_cpu["distances"] = get_distance_matrix(td_cpu["locs"])
    # TODO: avoid or generalize this, e.g., pre-compute for local search in each env
    actions = actions.detach().cpu()
    best_actions = env.local_search(td=td_cpu, actions=actions, **self.local_search_params)
    best_rewards = env.get_reward(td_cpu, best_actions)

    if self.use_nls:
        td_cpu_perturb = td_cpu.clone()
        td_cpu_perturb["distances"] = torch.tile(self.heuristic_dist, (self.n_ants, 1, 1))
        new_actions = best_actions.clone()

        for _ in range(self.n_perturbations):
            perturbed_actions = env.local_search(
                td=td_cpu_perturb, actions=new_actions, **self.perturbation_params
            )
            new_actions = env.local_search(td=td_cpu, actions=perturbed_actions, **self.local_search_params)
            new_rewards = env.get_reward(td_cpu, new_actions)

            improved_indices = new_rewards > best_rewards
            best_actions = env.replace_selected_actions(best_actions, new_actions, improved_indices)
            best_rewards[improved_indices] = new_rewards[improved_indices]

    best_actions = best_actions.to(td.device)
    best_rewards = best_rewards.to(td.device)

    return best_actions, best_rewards

get_logp ¶

get_logp()

Get the log probability (logprobs) values recorded during the execution of the algorithm.

Returns:

results –

Tuple containing the log probability values, actions chosen, rewards obtained, and mask values (if available).

Raises:

AssertionError –

If require_logp is not enabled.

Source code in rl4co/models/zoo/deepaco/antsystem.py

def get_logp(self):
    """Get the log probability (logprobs) values recorded during the execution of the algorithm.

    Returns:
        results: Tuple containing the log probability values,
            actions chosen, rewards obtained, and mask values (if available).

    Raises:
        AssertionError: If `require_logp` is not enabled.
    """

    assert (
        self.require_logprobs
    ), "Please enable `require_logp` to record logprobs values"

    logprobs_list, actions_list, reward_list, mask_list = [], [], [], []

    for logprobs, actions, reward, mask in self.all_records:
        logprobs_list.append(logprobs)
        actions_list.append(actions)
        reward_list.append(reward)
        mask_list.append(mask)

    if mask_list[0] is None:
        mask_list = None
    else:
        mask_list = torch.stack(mask_list, 0)

    # reset records
    self.all_records = []

    return (
        torch.stack(logprobs_list, 0),
        torch.stack(actions_list, 0),
        torch.stack(reward_list, 0),
        mask_list,
    )

Classes:

DeepACO –

Implements DeepACO: https://arxiv.org/abs/2309.14032.

DeepACO ¶

DeepACO(
    env: RL4COEnvBase,
    policy: Optional[DeepACOPolicy] = None,
    baseline: REINFORCEBaseline | str = "no",
    policy_kwargs: dict = {},
    baseline_kwargs: dict = {},
    **kwargs
)

Bases: REINFORCE

Implements DeepACO: https://arxiv.org/abs/2309.14032.

Parameters:

env (RL4COEnvBase) –

Environment to use for the algorithm
policy (Optional[DeepACOPolicy], default: None ) –

Policy to use for the algorithm
baseline (REINFORCEBaseline | str, default: 'no' ) –

REINFORCE baseline. Defaults to exponential
policy_kwargs (dict, default: {} ) –

Keyword arguments for policy
baseline_kwargs (dict, default: {} ) –

Keyword arguments for baseline
**kwargs –

Keyword arguments passed to the superclass

Source code in rl4co/models/zoo/deepaco/model.py

def __init__(
    self,
    env: RL4COEnvBase,
    policy: Optional[DeepACOPolicy] = None,
    baseline: REINFORCEBaseline | str = "no",
    policy_kwargs: dict = {},
    baseline_kwargs: dict = {},
    **kwargs,
):
    if policy is None:
        policy = DeepACOPolicy(env_name=env.name, **policy_kwargs)

    super().__init__(env, policy, baseline, baseline_kwargs, **kwargs)

Classes:

DeepACOPolicy –

Implememts DeepACO policy based on :class:NonAutoregressivePolicy. Introduced by Ye et al. (2023): https://arxiv.org/abs/2309.14032.

DeepACOPolicy ¶

DeepACOPolicy(
    encoder: Optional[NonAutoregressiveEncoder] = None,
    env_name: str = "tsp",
    temperature: float = 1.0,
    aco_class: Optional[Type[AntSystem]] = None,
    aco_kwargs: dict = {},
    train_with_local_search: bool = True,
    n_ants: Optional[int | dict] = None,
    n_iterations: Optional[int | dict] = None,
    ls_reward_aug_W: float = 0.95,
    **encoder_kwargs
)

Bases: NonAutoregressivePolicy

Implememts DeepACO policy based on :class:NonAutoregressivePolicy. Introduced by Ye et al. (2023): https://arxiv.org/abs/2309.14032. This policy uses a Non-Autoregressive Graph Neural Network to generate heatmaps, which are then used to run Ant Colony Optimization (ACO) to construct solutions.

Parameters:

encoder (Optional[NonAutoregressiveEncoder], default: None ) –

Encoder module. Can be passed by sub-classes
env_name (str, default: 'tsp' ) –

Name of the environment used to initialize embeddings
temperature (float, default: 1.0 ) –

Temperature for the softmax during decoding. Defaults to 0.1.
aco_class (Optional[Type[AntSystem]], default: None ) –

Class representing the ACO algorithm to be used. Defaults to :class:AntSystem.
aco_kwargs (dict, default: {} ) –

Additional arguments to be passed to the ACO algorithm.
n_ants (Optional[int | dict], default: None ) –

Number of ants to be used in the ACO algorithm. Can be an integer or dictionary. Defaults to 20.
n_iterations (Optional[int | dict], default: None ) –

Number of iterations to run the ACO algorithm. Can be an integer or dictionary. Defaults to dict(train=1, val=20, test=100).
ls_reward_aug_W (float, default: 0.95 ) –

Coefficient to be used for the reward augmentation with the local search. Defaults to 0.95.
encoder_kwargs –

Additional arguments to be passed to the encoder.

Methods:

forward –

Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions.

Source code in rl4co/models/zoo/deepaco/policy.py

def __init__(
    self,
    encoder: Optional[NonAutoregressiveEncoder] = None,
    env_name: str = "tsp",
    temperature: float = 1.0,
    aco_class: Optional[Type[AntSystem]] = None,
    aco_kwargs: dict = {},
    train_with_local_search: bool = True,
    n_ants: Optional[int | dict] = None,
    n_iterations: Optional[int | dict] = None,
    ls_reward_aug_W: float = 0.95,
    **encoder_kwargs,
):
    if encoder is None:
        encoder = NARGNNEncoder(**encoder_kwargs)

    super(DeepACOPolicy, self).__init__(
        encoder=encoder,
        env_name=env_name,
        temperature=temperature,
        train_decode_type="multistart_sampling",
        val_decode_type="multistart_sampling",
        test_decode_type="multistart_sampling",
    )

    self.aco_class = AntSystem if aco_class is None else aco_class
    self.aco_kwargs = aco_kwargs
    self.train_with_local_search = train_with_local_search
    self.n_ants = merge_with_defaults(n_ants, train=30, val=48, test=48)
    self.n_iterations = merge_with_defaults(n_iterations, train=1, val=5, test=10)
    self.ls_reward_aug_W = ls_reward_aug_W

forward ¶

forward(
    td_initial: TensorDict,
    env: Optional[str | RL4COEnvBase] = None,
    calc_reward: bool = True,
    phase: str = "train",
    actions=None,
    return_actions: bool = True,
    return_hidden: bool = True,
    **kwargs
)

Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions. See :class:NonAutoregressivePolicy for more details during the training phase.

Source code in rl4co/models/zoo/deepaco/policy.py

def forward(
    self,
    td_initial: TensorDict,
    env: Optional[str | RL4COEnvBase] = None,
    calc_reward: bool = True,
    phase: str = "train",
    actions=None,
    return_actions: bool = True,
    return_hidden: bool = True,
    **kwargs,
):
    """
    Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions.
    See :class:`NonAutoregressivePolicy` for more details during the training phase.
    """
    n_ants = self.n_ants[phase]
    # Instantiate environment if needed
    if (phase != "train" or self.ls_reward_aug_W > 0) and (
        env is None or isinstance(env, str)
    ):
        env_name = self.env_name if env is None else env
        env = get_env(env_name)

    if phase == "train":
        select_start_nodes_fn = partial(
            self.aco_class.select_start_node_fn,
            start_node=self.aco_kwargs.get("start_node", None),
        )
        kwargs.update({"select_start_nodes_fn": select_start_nodes_fn})
        #  we just use the constructive policy
        outdict = super().forward(
            td_initial,
            env,
            phase=phase,
            decode_type="multistart_sampling",
            calc_reward=calc_reward,
            num_starts=n_ants,
            actions=actions,
            return_actions=return_actions,
            return_hidden=return_hidden,
            **kwargs,
        )

        # manually compute the advantage
        reward = unbatchify(outdict["reward"], n_ants)
        advantage = reward - reward.mean(dim=1, keepdim=True)

        if self.ls_reward_aug_W > 0 and self.train_with_local_search:
            heatmap_logits = outdict["hidden"]
            aco = self.aco_class(
                heatmap_logits,
                n_ants=n_ants,
                temperature=self.aco_kwargs.get("temperature", self.temperature),
                **self.aco_kwargs,
            )

            actions = outdict["actions"]
            _, ls_reward = aco.local_search(
                batchify(td_initial, n_ants), env, actions
            )

            ls_reward = unbatchify(ls_reward, n_ants)
            ls_advantage = ls_reward - ls_reward.mean(dim=1, keepdim=True)
            advantage = (
                advantage * (1 - self.ls_reward_aug_W)
                + ls_advantage * self.ls_reward_aug_W
            )

        outdict["advantage"] = advantage
        outdict["log_likelihood"] = unbatchify(outdict["log_likelihood"], n_ants)

        return outdict

    heatmap_logits, _ = self.encoder(td_initial)

    aco = self.aco_class(
        heatmap_logits,
        n_ants=self.n_ants[phase],
        temperature=self.aco_kwargs.get("temperature", self.temperature),
        **self.aco_kwargs,
    )
    td, actions, reward = aco.run(td_initial, env, self.n_iterations[phase])

    out = {}
    if calc_reward:
        out["reward"] = reward
    if return_actions:
        out["actions"] = actions

    return out

NAR-GNN¶

Classes:

NARGNNPolicy –

Base Non-autoregressive policy for NCO construction methods.

NARGNNPolicy ¶

NARGNNPolicy(
    encoder: Optional[NonAutoregressiveEncoder] = None,
    decoder: Optional[NonAutoregressiveDecoder] = None,
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    train_decode_type: str = "multistart_sampling",
    val_decode_type: str = "multistart_greedy",
    test_decode_type: str = "multistart_greedy",
    **constructive_policy_kw
)

Bases: NonAutoregressivePolicy

Base Non-autoregressive policy for NCO construction methods. This creates a heatmap of NxN for N nodes (i.e., heuristic) that models the probability to go from one node to another for all nodes.

The policy performs the following steps

Encode the environment initial state into node embeddings
Decode (non-autoregressively) to construct the solution to the NCO problem

Warning

The effectiveness of the non-autoregressive approach can vary significantly across different problem types and configurations. It may require careful tuning of the model architecture and decoding strategy to achieve competitive results.

Parameters:

encoder (Optional[NonAutoregressiveEncoder], default: None ) –

Encoder module. Can be passed by sub-classes
decoder (Optional[NonAutoregressiveDecoder], default: None ) –

Decoder module. Note that this moule defaults to the non-autoregressive decoder
embed_dim (int, default: 64 ) –

Dimension of the embeddings
env_name (str, default: 'tsp' ) –

Name of the environment used to initialize embeddings
init_embedding (Optional[Module], default: None ) –

Model to use for the initial embedding. If None, use the default embedding for the environment
edge_embedding (Optional[Module], default: None ) –

Model to use for the edge embedding. If None, use the default embedding for the environment
graph_network (Optional[Module], default: None ) –

Model to use for the graph network. If None, use the default embedding for the environment
heatmap_generator (Optional[Module], default: None ) –

Model to use for the heatmap generator. If None, use the default embedding for the environment
num_layers_heatmap_generator (int, default: 5 ) –

Number of layers in the heatmap generator
num_layers_graph_encoder (int, default: 15 ) –

Number of layers in the graph encoder
act_fn –

Activation function to use in the encoder
agg_fn –

Aggregation function to use in the encoder
linear_bias (bool, default: True ) –

Whether to use bias in the encoder
train_decode_type (str, default: 'multistart_sampling' ) –

Type of decoding during training
val_decode_type (str, default: 'multistart_greedy' ) –

Type of decoding during validation
test_decode_type (str, default: 'multistart_greedy' ) –

Type of decoding during testing
**constructive_policy_kw –

Unused keyword arguments

Source code in rl4co/models/zoo/nargnn/policy.py

def __init__(
    self,
    encoder: Optional[NonAutoregressiveEncoder] = None,
    decoder: Optional[NonAutoregressiveDecoder] = None,
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    train_decode_type: str = "multistart_sampling",
    val_decode_type: str = "multistart_greedy",
    test_decode_type: str = "multistart_greedy",
    **constructive_policy_kw,
):
    if len(constructive_policy_kw) > 0:
        log.warn(f"Unused kwargs: {constructive_policy_kw}")

    if encoder is None:
        encoder = NARGNNEncoder(
            embed_dim=embed_dim,
            env_name=env_name,
            init_embedding=init_embedding,
            edge_embedding=edge_embedding,
            graph_network=graph_network,
            heatmap_generator=heatmap_generator,
            num_layers_heatmap_generator=num_layers_heatmap_generator,
            num_layers_graph_encoder=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
            linear_bias=linear_bias,
        )

    # The decoder generates logits given the current td and heatmap
    if decoder is None:
        decoder = NonAutoregressiveDecoder()
    else:
        # check if the decoder has trainable parameters
        if any(p.requires_grad for p in decoder.parameters()):
            log.error(
                "The decoder contains trainable parameters. This should not happen in a non-autoregressive policy."
            )

    # Pass to constructive policy
    super(NARGNNPolicy, self).__init__(
        encoder=encoder,
        decoder=decoder,
        env_name=env_name,
        train_decode_type=train_decode_type,
        val_decode_type=val_decode_type,
        test_decode_type=test_decode_type,
        **constructive_policy_kw,
    )

Classes:

EdgeHeatmapGenerator –

MLP for converting edge embeddings to heatmaps.
NARGNNEncoder –

Anisotropic Graph Neural Network encoder with edge-gating mechanism as in Joshi et al. (2022), and used in DeepACO (Ye et al., 2023).
NARGNNNodeEncoder –

In this case, we just use the node embeddings from the graph

EdgeHeatmapGenerator ¶

EdgeHeatmapGenerator(
    embed_dim: int,
    num_layers: int,
    act_fn: str | Callable = "silu",
    linear_bias: bool = True,
    undirected_graph: bool = True,
)

Bases: Module

MLP for converting edge embeddings to heatmaps.

Parameters:

embed_dim (int) –

Dimension of the embeddings
num_layers (int) –

The number of linear layers in the network.
act_fn (str | Callable, default: 'silu' ) –

Activation function. Defaults to "silu".
linear_bias (bool, default: True ) –

Use bias in linear layers. Defaults to True.
undirected_graph (bool, default: True ) –

Whether the graph is undirected. Defaults to True.

Source code in rl4co/models/zoo/nargnn/encoder.py

def __init__(
    self,
    embed_dim: int,
    num_layers: int,
    act_fn: str | Callable = "silu",
    linear_bias: bool = True,
    undirected_graph: bool = True,
) -> None:
    super(EdgeHeatmapGenerator, self).__init__()

    self.linears = nn.ModuleList(
        [
            nn.Linear(embed_dim, embed_dim, bias=linear_bias)
            for _ in range(num_layers - 1)
        ]
    )
    self.output = nn.Linear(embed_dim, 1, bias=linear_bias)

    self.act = getattr(nn.functional, act_fn) if isinstance(act_fn, str) else act_fn

    self.undirected_graph = undirected_graph

NARGNNEncoder ¶

NARGNNEncoder(
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
)

Bases: NonAutoregressiveEncoder

Anisotropic Graph Neural Network encoder with edge-gating mechanism as in Joshi et al. (2022), and used in DeepACO (Ye et al., 2023). This creates a heatmap of NxN for N nodes (i.e., heuristic) that models the probability to go from one node to another for all nodes. This model utilizes a multi-layer perceptron (MLP) approach to predict edge attributes directly from the input graph features, which are then transformed into a heatmap representation to facilitate the decoding of the solution. The decoding process is managed by a specified strategy which could vary from simple greedy selection to more complex sampling methods.

Tip

This decoder's performance heavily relies on the ability of the MLP to capture the dependencies between different parts of the solution without the iterative refinement provided by autoregressive models. It is particularly useful in scenarios where the solution space can be effectively explored in a parallelized manner or when the solution components are largely independent.

Parameters:

embed_dim (int, default: 64 ) –

Dimension of the node embeddings
env_name (str, default: 'tsp' ) –

Name of the environment used to initialize embeddings
num_layers –

Number of layers in the encoder
init_embedding (Optional[Module], default: None ) –

Model to use for the initial embedding. If None, use the default embedding for the environment
edge_embedding (Optional[Module], default: None ) –

Model to use for the edge embedding. If None, use the default embedding for the environment
graph_network (Optional[Module], default: None ) –

Model to use for the graph network. If None, use the default network for the environment
heatmap_generator (Optional[Module], default: None ) –

Model to use for the heatmap generator. If None, use the default network for the environment
num_layers_heatmap_generator (int, default: 5 ) –

Number of layers in the heatmap generator
num_layers_graph_encoder (int, default: 15 ) –

Number of layers in the graph encoder
act_fn –

The activation function to use in each GNNLayer, see https://pytorch.org/docs/stable/nn.functional.html#non-linear-activation-functions for available options. Defaults to 'silu'.
agg_fn –

The aggregation function to use in each GNNLayer for pooling features. Options: 'add', 'mean', 'max'. Defaults to 'mean'.
linear_bias (bool, default: True ) –

Use bias in linear layers. Defaults to True.
k_sparse (Optional[int], default: None ) –

Number of edges to keep for each node. Defaults to None.

Methods:

forward –

Forward pass of the encoder.

Source code in rl4co/models/zoo/nargnn/encoder.py

def __init__(
    self,
    embed_dim: int = 64,
    env_name: str = "tsp",
    # TODO: pass network
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
):
    super(NonAutoregressiveEncoder, self).__init__()
    self.env_name = env_name

    self.init_embedding = (
        env_init_embedding(self.env_name, {"embed_dim": embed_dim})
        if init_embedding is None
        else init_embedding
    )

    self.edge_embedding = (
        env_edge_embedding(
            self.env_name, {"embed_dim": embed_dim, "k_sparse": k_sparse}
        )
        if edge_embedding is None
        else edge_embedding
    )

    self.graph_network = (
        GNNEncoder(
            embed_dim=embed_dim,
            num_layers=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
        )
        if graph_network is None
        else graph_network
    )

    self.heatmap_generator = (
        EdgeHeatmapGenerator(
            embed_dim=embed_dim,
            num_layers=num_layers_heatmap_generator,
            linear_bias=linear_bias,
        )
        if heatmap_generator is None
        else heatmap_generator
    )

forward ¶

forward(td: TensorDict)

Forward pass of the encoder. Transform the input TensorDict into the latent representation.

Source code in rl4co/models/zoo/nargnn/encoder.py

def forward(self, td: TensorDict):
    """Forward pass of the encoder.
    Transform the input TensorDict into the latent representation.
    """
    # Transfer to embedding space
    node_embed = self.init_embedding(td)
    graph = self.edge_embedding(td, node_embed)

    # Process embedding into graph
    # TODO: standardize?
    graph.x, graph.edge_attr = self.graph_network(
        graph.x, graph.edge_index, graph.edge_attr
    )

    # Generate heatmap logits
    heatmap_logits = self.heatmap_generator(graph)

    # Return latent representation (i.e. heatmap logits) and initial embeddings
    return heatmap_logits, node_embed

NARGNNNodeEncoder ¶

NARGNNNodeEncoder(
    embed_dim: int = 64,
    env_name: str = "tsp",
    init_embedding: Optional[Module] = None,
    edge_embedding: Optional[Module] = None,
    graph_network: Optional[Module] = None,
    heatmap_generator: Optional[Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
)

Bases: NARGNNEncoder

In this case, we just use the node embeddings from the graph without transforming them into a heatmap.

Methods:

forward –

Forward pass of the encoder.

Source code in rl4co/models/zoo/nargnn/encoder.py

def __init__(
    self,
    embed_dim: int = 64,
    env_name: str = "tsp",
    # TODO: pass network
    init_embedding: Optional[nn.Module] = None,
    edge_embedding: Optional[nn.Module] = None,
    graph_network: Optional[nn.Module] = None,
    heatmap_generator: Optional[nn.Module] = None,
    num_layers_heatmap_generator: int = 5,
    num_layers_graph_encoder: int = 15,
    act_fn="silu",
    agg_fn="mean",
    linear_bias: bool = True,
    k_sparse: Optional[int] = None,
):
    super(NonAutoregressiveEncoder, self).__init__()
    self.env_name = env_name

    self.init_embedding = (
        env_init_embedding(self.env_name, {"embed_dim": embed_dim})
        if init_embedding is None
        else init_embedding
    )

    self.edge_embedding = (
        env_edge_embedding(
            self.env_name, {"embed_dim": embed_dim, "k_sparse": k_sparse}
        )
        if edge_embedding is None
        else edge_embedding
    )

    self.graph_network = (
        GNNEncoder(
            embed_dim=embed_dim,
            num_layers=num_layers_graph_encoder,
            act_fn=act_fn,
            agg_fn=agg_fn,
        )
        if graph_network is None
        else graph_network
    )

    self.heatmap_generator = (
        EdgeHeatmapGenerator(
            embed_dim=embed_dim,
            num_layers=num_layers_heatmap_generator,
            linear_bias=linear_bias,
        )
        if heatmap_generator is None
        else heatmap_generator
    )

forward ¶

forward(td: TensorDict)

Forward pass of the encoder. Transform the input TensorDict into the latent representation.

Source code in rl4co/models/zoo/nargnn/encoder.py

def forward(self, td: TensorDict):
    # Transfer to embedding space
    node_embed = self.init_embedding(td)
    graph = self.edge_embedding(td, node_embed)

    # Process embedding into graph
    # TODO: standardize?
    graph.x, graph.edge_attr = self.graph_network(
        graph.x, graph.edge_index, graph.edge_attr
    )

    proc_embeds = graph.x
    batch_size = node_embed.shape[0]
    # reshape proc_embeds from [bs*n, h] to [bs, n, h]
    proc_embeds = proc_embeds.reshape(batch_size, -1, proc_embeds.shape[1])
    return proc_embeds, node_embed