Skip to content

Commit 53b0cad

Browse files
Merge pull request #2947 from AlexandreSinger/feature-appack
[APPack] Flat-Placement Informed Unrelated Clustering
2 parents e7f3a02 + 9c87044 commit 53b0cad

File tree

5 files changed

+338
-36
lines changed

5 files changed

+338
-36
lines changed

vpr/src/pack/appack_context.h

+30-5
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ struct t_appack_options {
5353
CENTROID, /**< The location of the cluster is the centroid of the molecules which have been packed into it. */
5454
SEED /**< The location of the cluster is the location of the first molecule packed into it. */
5555
};
56-
e_cl_loc_ty cluster_location_ty = e_cl_loc_ty::CENTROID;
56+
static constexpr e_cl_loc_ty cluster_location_ty = e_cl_loc_ty::CENTROID;
5757

5858
// =========== Candidate gain attenuation ============================== //
5959
// These terms are used to update the gain of a given candidate based on
@@ -67,11 +67,11 @@ struct t_appack_options {
6767
// Distance threshold which decides when to use quadratic decay or inverted
6868
// sqrt decay. If the distance is less than this threshold, quadratic decay
6969
// is used. Inverted sqrt is used otherwise.
70-
float dist_th = 5.0f;
70+
static constexpr float dist_th = 5.0f;
7171
// Horizontal offset to the inverted sqrt decay.
72-
float sqrt_offset = -1.1f;
72+
static constexpr float sqrt_offset = -1.1f;
7373
// Scaling factor for the quadratic decay term.
74-
float quad_fac = 0.1543f;
74+
static constexpr float quad_fac = 0.1543f;
7575

7676
// =========== Candidate selection distance ============================ //
7777
// When selecting candidates, what distance from the cluster will we
@@ -81,7 +81,32 @@ struct t_appack_options {
8181
// types of molecules / clusters. For example, CLBs vs DSPs
8282
float max_candidate_distance = std::numeric_limits<float>::max();
8383

84-
// TODO: Investigate adding flat placement info to unrelated clustering.
84+
// =========== Unrelated clustering ==================================== //
85+
// After searching for candidates by connectivity and timing, the user may
86+
// turn on unrelated clustering, which will allow molecules which are
87+
// unrelated to the cluster being created to be attempted to be packed in.
88+
// APPack uses flat placement information to decide which unrelated
89+
// molecules to try.
90+
91+
// APPack will search for unrelated molecules in the tile which contains
92+
// the flat location of the cluster. It will then look farther out, tile
93+
// by tile. This parameter is the maximum distance from the cluster's tile
94+
// that APPack will search. Setting this to 0 would only allow APPack to
95+
// search within the cluster's tile. Setting this to a higher number would
96+
// allow APPack to search farther away; but may bring in molecules which
97+
// do not "want" to be in the cluster.
98+
static constexpr float max_unrelated_tile_distance = 1.0f;
99+
100+
// Unrelated clustering occurs after all other candidate selection methods
101+
// have failed. This parameter sets how many time we will attempt unrelated
102+
// clustering between failures of unrelated clustering. If this is set to
103+
// 1, and unrelated clustering failed for a cluster, it will not be attempted
104+
// again for that cluster (note: if it succeeds, the number of attempts get
105+
// reset).
106+
// NOTE: A similar option exists in the candidate selector class. This was
107+
// duplicated since it is very likely that APPack would need a
108+
// different value for this option than the non-APPack flow.
109+
static constexpr int max_unrelated_clustering_attempts = 2;
85110

86111
// TODO: Investigate adding flat placement info to seed selection.
87112
};

vpr/src/pack/greedy_candidate_selector.cpp

+206-31
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "greedy_candidate_selector.h"
99
#include <algorithm>
1010
#include <cmath>
11+
#include <queue>
1112
#include <vector>
1213
#include "appack_context.h"
1314
#include "flat_placement_types.h"
@@ -16,13 +17,12 @@
1617
#include "attraction_groups.h"
1718
#include "cluster_legalizer.h"
1819
#include "cluster_placement.h"
19-
#include "globals.h"
2020
#include "greedy_clusterer.h"
2121
#include "prepack.h"
2222
#include "timing_info.h"
23-
#include "vpr_context.h"
2423
#include "vpr_types.h"
2524
#include "vtr_assert.h"
25+
#include "vtr_ndmatrix.h"
2626
#include "vtr_vector.h"
2727

2828
/*
@@ -105,44 +105,102 @@ GreedyCandidateSelector::GreedyCandidateSelector(
105105
, timing_info_(timing_info)
106106
, appack_ctx_(appack_ctx)
107107
, rng_(0) {
108-
// Initialize the list of molecules to pack, the clustering data, and the
109-
// net info.
110108

111-
// Initialize unrelated clustering data.
109+
// Initialize unrelated clustering data if unrelated clustering is enabled.
112110
if (allow_unrelated_clustering_) {
113-
/* alloc and load list of molecules to pack */
114-
unrelated_clustering_data_.resize(max_molecule_stats.num_used_ext_inputs + 1);
111+
initialize_unrelated_clustering_data(max_molecule_stats);
112+
}
113+
114+
/* TODO: This is memory inefficient, fix if causes problems */
115+
/* Store stats on nets used by packed block, useful for determining transitively connected blocks
116+
* (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */
117+
clb_inter_blk_nets_.resize(atom_netlist.blocks().size());
118+
}
119+
120+
void GreedyCandidateSelector::initialize_unrelated_clustering_data(const t_molecule_stats& max_molecule_stats) {
121+
// Create a sorted list of molecules, sorted on decreasing molecule base
122+
// gain. (Highest gain).
123+
std::vector<PackMoleculeId> molecules_vector;
124+
molecules_vector.assign(prepacker_.molecules().begin(), prepacker_.molecules().end());
125+
std::stable_sort(molecules_vector.begin(),
126+
molecules_vector.end(),
127+
[&](PackMoleculeId a_id, PackMoleculeId b_id) {
128+
const t_pack_molecule& a = prepacker_.get_molecule(a_id);
129+
const t_pack_molecule& b = prepacker_.get_molecule(b_id);
130+
131+
return a.base_gain > b.base_gain;
132+
});
133+
134+
if (appack_ctx_.appack_options.use_appack) {
135+
/**
136+
* For APPack, we build a spatial data structure where for each 1x1 grid
137+
* position on the FPGA, we maintain lists of molecule candidates.
138+
* The lists are in order of number of used external pins by the molecule.
139+
* Within each list, the molecules are sorted by their base gain.
140+
*/
141+
// Get the max x, y, and layer from the flat placement.
142+
t_flat_pl_loc max_loc({0.0f, 0.0f, 0.0f});
143+
for (PackMoleculeId mol_id : molecules_vector) {
144+
t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_);
145+
max_loc.x = std::max(max_loc.x, mol_pos.x);
146+
max_loc.y = std::max(max_loc.y, mol_pos.y);
147+
max_loc.layer = std::max(max_loc.layer, mol_pos.layer);
148+
}
115149

116-
// Create a sorted list of molecules, sorted on decreasing molecule base
117-
// gain. (Highest gain).
118-
std::vector<PackMoleculeId> molecules_vector;
119-
molecules_vector.assign(prepacker.molecules().begin(), prepacker.molecules().end());
120-
std::stable_sort(molecules_vector.begin(),
121-
molecules_vector.end(),
122-
[&](PackMoleculeId a_id, PackMoleculeId b_id) {
123-
const t_pack_molecule& a = prepacker.get_molecule(a_id);
124-
const t_pack_molecule& b = prepacker.get_molecule(b_id);
150+
VTR_ASSERT_MSG(max_loc.layer == 0,
151+
"APPack unrelated clustering does not support 3D "
152+
"FPGAs yet");
153+
154+
// Initialize the data structure with empty arrays with enough space
155+
// for each molecule.
156+
size_t flat_grid_width = max_loc.x + 1;
157+
size_t flat_grid_height = max_loc.y + 1;
158+
appack_unrelated_clustering_data_ =
159+
vtr::NdMatrix<std::vector<std::vector<PackMoleculeId>>, 2>({flat_grid_width,
160+
flat_grid_height});
161+
for (size_t x = 0; x < flat_grid_width; x++) {
162+
for (size_t y = 0; y < flat_grid_height; y++) {
163+
// Resize to the maximum number of used external pins. This is
164+
// to ensure that every molecule below can be inserted into a
165+
// valid list based on their number of external pins.
166+
appack_unrelated_clustering_data_[x][y].resize(max_molecule_stats.num_used_ext_pins + 1);
167+
}
168+
}
169+
170+
// Fill the grid with molecule information.
171+
// Note: These molecules are sorted based on their base gain. They are
172+
// inserted in such a way that the highest gain molecules appear
173+
// first in the lists below.
174+
for (PackMoleculeId mol_id : molecules_vector) {
175+
t_flat_pl_loc mol_pos = get_molecule_pos(mol_id, prepacker_, appack_ctx_);
125176

126-
return a.base_gain > b.base_gain;
127-
});
177+
//Figure out how many external inputs are used by this molecule
178+
t_molecule_stats molecule_stats = prepacker_.calc_molecule_stats(mol_id, atom_netlist_);
179+
int ext_inps = molecule_stats.num_used_ext_inputs;
180+
181+
//Insert the molecule into the unclustered lists by number of external inputs
182+
auto& tile_uc_data = appack_unrelated_clustering_data_[mol_pos.x][mol_pos.y];
183+
tile_uc_data[ext_inps].push_back(mol_id);
184+
}
185+
} else {
186+
// When not performing APPack, allocate and load a similar data structure
187+
// without spatial information.
188+
189+
/* alloc and load list of molecules to pack */
190+
unrelated_clustering_data_.resize(max_molecule_stats.num_used_ext_inputs + 1);
128191

129192
// Push back the each molecule into the unrelated clustering data vector
130193
// for their external inputs. This creates individual sorted lists of
131194
// molecules for each number of used external inputs.
132195
for (PackMoleculeId mol_id : molecules_vector) {
133196
//Figure out how many external inputs are used by this molecule
134-
t_molecule_stats molecule_stats = prepacker.calc_molecule_stats(mol_id, atom_netlist);
197+
t_molecule_stats molecule_stats = prepacker_.calc_molecule_stats(mol_id, atom_netlist_);
135198
int ext_inps = molecule_stats.num_used_ext_inputs;
136199

137200
//Insert the molecule into the unclustered lists by number of external inputs
138201
unrelated_clustering_data_[ext_inps].push_back(mol_id);
139202
}
140203
}
141-
142-
/* TODO: This is memory inefficient, fix if causes problems */
143-
/* Store stats on nets used by packed block, useful for determining transitively connected blocks
144-
* (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */
145-
clb_inter_blk_nets_.resize(atom_netlist.blocks().size());
146204
}
147205

148206
GreedyCandidateSelector::~GreedyCandidateSelector() {
@@ -673,15 +731,23 @@ PackMoleculeId GreedyCandidateSelector::get_next_candidate_for_cluster(
673731
// If we are allowing unrelated clustering and no molecule has been found,
674732
// get unrelated candidate for cluster.
675733
if (allow_unrelated_clustering_ && best_molecule == PackMoleculeId::INVALID()) {
676-
if (num_unrelated_clustering_attempts_ < max_unrelated_clustering_attempts_) {
677-
best_molecule = get_unrelated_candidate_for_cluster(cluster_id,
678-
cluster_legalizer);
679-
num_unrelated_clustering_attempts_++;
680-
VTR_LOGV(best_molecule && log_verbosity_ > 2,
681-
"\tFound unrelated molecule to cluster\n");
734+
const t_appack_options& appack_options = appack_ctx_.appack_options;
735+
if (appack_options.use_appack) {
736+
if (num_unrelated_clustering_attempts_ < appack_options.max_unrelated_clustering_attempts) {
737+
best_molecule = get_unrelated_candidate_for_cluster_appack(cluster_gain_stats,
738+
cluster_id,
739+
cluster_legalizer);
740+
num_unrelated_clustering_attempts_++;
741+
}
682742
} else {
683-
num_unrelated_clustering_attempts_ = 0;
743+
if (num_unrelated_clustering_attempts_ < max_unrelated_clustering_attempts_) {
744+
best_molecule = get_unrelated_candidate_for_cluster(cluster_id,
745+
cluster_legalizer);
746+
num_unrelated_clustering_attempts_++;
747+
}
684748
}
749+
VTR_LOGV(best_molecule && log_verbosity_ > 2,
750+
"\tFound unrelated molecule to cluster\n");
685751
} else {
686752
VTR_LOGV(!best_molecule && log_verbosity_ > 2,
687753
"\tNo related molecule found and unrelated clustering disabled\n");
@@ -1154,6 +1220,115 @@ PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster(
11541220
return PackMoleculeId::INVALID();
11551221
}
11561222

1223+
PackMoleculeId GreedyCandidateSelector::get_unrelated_candidate_for_cluster_appack(
1224+
ClusterGainStats& cluster_gain_stats,
1225+
LegalizationClusterId cluster_id,
1226+
const ClusterLegalizer& cluster_legalizer) {
1227+
1228+
/**
1229+
* For APPack, we want to find a close candidate with the highest number
1230+
* of available inputs which could be packed into the given cluster.
1231+
* We will search for candidates in a BFS manner, where we will search in
1232+
* the same 1x1 grid location of the cluster for a compatible candidate, and
1233+
* will then search out if none can be found.
1234+
*
1235+
* Here, a molecule is compatible if:
1236+
* - It has not been clustered already
1237+
* - The number of inputs it has available is less than or equal to the
1238+
* number of inputs available in the cluster.
1239+
* - It has not tried to be packed in this cluster before.
1240+
* - It is compatible with the cluster.
1241+
*/
1242+
1243+
VTR_ASSERT_MSG(allow_unrelated_clustering_,
1244+
"Cannot get unrelated candidates when unrelated clustering "
1245+
"is disabled");
1246+
1247+
VTR_ASSERT_MSG(appack_ctx_.appack_options.use_appack,
1248+
"APPack is disabled, cannot get unrelated clusters using "
1249+
"flat placement information");
1250+
1251+
// The cluster will likely have more inputs available than a single molecule
1252+
// would have available (clusters have more pins). Clamp the inputs available
1253+
// to the max number of inputs a molecule could have.
1254+
size_t inputs_avail = cluster_legalizer.get_num_cluster_inputs_available(cluster_id);
1255+
VTR_ASSERT_SAFE(!appack_unrelated_clustering_data_.empty());
1256+
size_t max_molecule_inputs_avail = appack_unrelated_clustering_data_[0][0].size() - 1;
1257+
if (inputs_avail >= max_molecule_inputs_avail) {
1258+
inputs_avail = max_molecule_inputs_avail;
1259+
}
1260+
1261+
// Create a queue of locations to search and a map of visited grid locations.
1262+
std::queue<t_flat_pl_loc> search_queue;
1263+
vtr::NdMatrix<bool, 2> visited({appack_unrelated_clustering_data_.dim_size(0),
1264+
appack_unrelated_clustering_data_.dim_size(1)},
1265+
false);
1266+
// Push the position of the cluster to the queue.
1267+
search_queue.push(cluster_gain_stats.flat_cluster_position);
1268+
1269+
while (!search_queue.empty()) {
1270+
// Pop a position to search from the queue.
1271+
const t_flat_pl_loc& node_loc = search_queue.front();
1272+
VTR_ASSERT_SAFE(node_loc.layer == 0);
1273+
1274+
// If this position is too far from the source, skip it.
1275+
float dist = get_manhattan_distance(node_loc, cluster_gain_stats.flat_cluster_position);
1276+
if (dist > 1) {
1277+
search_queue.pop();
1278+
continue;
1279+
}
1280+
1281+
// If this position has been visited, skip it.
1282+
if (visited[node_loc.x][node_loc.y]) {
1283+
search_queue.pop();
1284+
continue;
1285+
}
1286+
visited[node_loc.x][node_loc.y] = true;
1287+
1288+
// Explore this position from highest number of inputs available to lowest.
1289+
const auto& uc_data = appack_unrelated_clustering_data_[node_loc.x][node_loc.y];
1290+
VTR_ASSERT_SAFE(inputs_avail < uc_data.size());
1291+
for (int ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
1292+
// Get the molecule by the number of external inputs.
1293+
for (PackMoleculeId mol_id : uc_data[ext_inps]) {
1294+
// If this molecule has been clustered, skip it.
1295+
if (cluster_legalizer.is_mol_clustered(mol_id))
1296+
continue;
1297+
// If this molecule has tried to be packed before and failed
1298+
// do not try it. This also means that this molecule may be
1299+
// related to this cluster in some way.
1300+
if (cluster_gain_stats.mol_failures.find(mol_id) != cluster_gain_stats.mol_failures.end())
1301+
continue;
1302+
// If this molecule is not compatible with the current cluster
1303+
// skip it.
1304+
if (!cluster_legalizer.is_molecule_compatible(mol_id, cluster_id))
1305+
continue;
1306+
// Return this molecule as the unrelated candidate.
1307+
return mol_id;
1308+
}
1309+
}
1310+
1311+
// Push the neighbors of the position to the queue.
1312+
// Note: Here, we are using the manhattan distance, so we do not push
1313+
// the diagonals. We also want to try the direct neighbors first
1314+
// since they should be closer.
1315+
if (node_loc.x >= 1.0f)
1316+
search_queue.push({node_loc.x - 1, node_loc.y, node_loc.layer});
1317+
if (node_loc.x <= visited.dim_size(0) - 2)
1318+
search_queue.push({node_loc.x + 1, node_loc.y, node_loc.layer});
1319+
if (node_loc.y >= 1.0f)
1320+
search_queue.push({node_loc.x, node_loc.y - 1, node_loc.layer});
1321+
if (node_loc.y <= visited.dim_size(1) - 2)
1322+
search_queue.push({node_loc.x, node_loc.y + 1, node_loc.layer});
1323+
1324+
// Pop the position off the queue.
1325+
search_queue.pop();
1326+
}
1327+
1328+
// No molecule could be found. Return an invalid ID.
1329+
return PackMoleculeId::INVALID();
1330+
}
1331+
11571332
void GreedyCandidateSelector::update_candidate_selector_finalize_cluster(
11581333
ClusterGainStats& cluster_gain_stats,
11591334
LegalizationClusterId cluster_id) {

0 commit comments

Comments
 (0)