PandA-2024.02
cdfc_module_binding.cpp
Go to the documentation of this file.
1 /*
2  *
3  * _/_/_/ _/_/ _/ _/ _/_/_/ _/_/
4  * _/ _/ _/ _/ _/_/ _/ _/ _/ _/ _/
5  * _/_/_/ _/_/_/_/ _/ _/_/ _/ _/ _/_/_/_/
6  * _/ _/ _/ _/ _/ _/ _/ _/ _/
7  * _/ _/ _/ _/ _/ _/_/_/ _/ _/
8  *
9  * ***********************************************
10  * PandA Project
11  * URL: http://panda.dei.polimi.it
12  * Politecnico di Milano - DEIB
13  * System Architectures Group
14  * ***********************************************
15  * Copyright (C) 2004-2024 Politecnico di Milano
16  *
17  * This file is part of the PandA framework.
18  *
19  * The PandA framework is free software; you can redistribute it and/or modify
20  * it under the terms of the GNU General Public License as published by
21  * the Free Software Foundation; either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * This program is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU General Public License for more details.
28  *
29  * You should have received a copy of the GNU General Public License
30  * along with this program. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
43 #include "cdfc_module_binding.hpp"
44 
45 #include "Parameter.hpp"
47 #include "behavioral_helper.hpp"
49 #include "chaining_information.hpp"
50 #include "clique_covering.hpp"
51 #include "cpu_time.hpp"
52 #include "custom_map.hpp"
53 #include "custom_set.hpp"
54 #include "dbgPrintHelper.hpp"
55 #include "design_flow_manager.hpp"
56 #include "design_flow_step.hpp"
57 #include "fu_binding.hpp"
58 #include "function_behavior.hpp"
59 #include "graph.hpp"
60 #include "hash_helper.hpp"
61 #include "hls.hpp"
62 #include "hls_constraints.hpp"
63 #include "hls_device.hpp"
65 #include "hls_manager.hpp"
66 #include "liveness.hpp"
67 #include "memory.hpp"
68 #include "module_binding_check.hpp"
70 #include "op_graph.hpp"
72 #include "reg_binding.hpp"
73 #include "schedule.hpp"
77 #include "technology_manager.hpp"
78 #include "technology_node.hpp"
79 #include "tree_helper.hpp"
80 #include "tree_manager.hpp"
81 #include "tree_node.hpp"
82 #include "utility.hpp"
84 
85 #include <algorithm>
86 #include <boost/range/adaptor/reversed.hpp>
87 #include <cmath>
88 #include <deque>
89 #include <filesystem>
90 #include <iosfwd>
91 #include <limits>
92 #include <list>
93 #include <string>
94 #include <utility>
95 #include <vector>
96 
97 #ifdef HC_APPROACH
98 #include "hierarchical_clustering.hpp"
99 struct spec_hierarchical_clustering : public hierarchical_clustering<>
100 {
101 };
102 #endif
103 #include "string_manipulation.hpp" // for GET_CLASS
104 
105 #define DEFAULT_SMALL_NORMALIZED_RESOURCE_AREA 2.0
106 #define MODULE_BINDING_MUX_MARGIN 1.0
107 #define DSP_MARGIN 1.0
108 #define CLOCK_MARGIN 0.97
109 #define OP_THRESHOLD 1000
110 
111 template <typename OutputIterator>
112 struct topological_based_sorting_visitor : public boost::dfs_visitor<>
113 {
114  topological_based_sorting_visitor(std::vector<vertex>& _c2s, const OpGraphConstRef _sdg, OutputIterator _iter)
115  : m_iter(_iter), c2s(_c2s), sdg(_sdg)
116  {
117  }
118 
119  template <typename Vertex, typename Graph>
120  void finish_vertex(const Vertex& u, Graph& g)
121  {
122  *m_iter++ = boost::get(boost::vertex_index, g, u);
123  }
124  template <typename Edge, typename Graph>
125  void back_edge(const Edge& e, Graph& g)
126  {
127  std::cerr << GET_NAME(sdg, c2s[boost::get(boost::vertex_index, g, boost::source(e, g))]) << "("
128  << sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, g, boost::source(e, g))])->GetOperation()
129  << ")-" << GET_NAME(sdg, c2s[boost::get(boost::vertex_index, g, boost::target(e, g))]) << "("
130  << sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, g, boost::target(e, g))])->GetOperation()
131  << ")" << std::endl;
132  BOOST_THROW_EXCEPTION(boost::not_a_dag());
133  }
134 
135  OutputIterator m_iter;
136  std::vector<vertex>& c2s;
138 };
139 
140 template <typename VertexListGraph, typename OutputIterator, typename P, typename T, typename R>
141 void topological_based_sorting(const VertexListGraph& g, std::vector<vertex>& c2s, const OpGraphConstRef sdg,
142  OutputIterator result, const boost::bgl_named_params<P, T, R>& params)
143 {
145  boost::depth_first_search(g, params.visitor(TopoVisitor(c2s, sdg, result)));
146 }
147 
148 template <typename VertexListGraph, typename OutputIterator>
149 void topological_based_sorting(const VertexListGraph& g, std::vector<vertex>& c2s, const OpGraphConstRef sdg,
150  OutputIterator result)
151 {
152  topological_based_sorting(g, c2s, sdg, result, boost::bgl_named_params<int, boost::buffer_param_t>(0));
153 }
154 
159 {
160  private:
163 
164  public:
171  bool operator()(const unsigned int& a, const unsigned int& b) const
172  {
173  unsigned char wm_a = (allocation_information->is_direct_access_memory_unit(a) ||
174  allocation_information->is_indirect_access_memory_unit(a)) ?
175  1 :
176  0;
177  unsigned char wm_b = (allocation_information->is_direct_access_memory_unit(b) ||
178  allocation_information->is_indirect_access_memory_unit(b)) ?
179  1 :
180  0;
181  double wd_a = allocation_information->get_DSPs(a);
182  double wd_b = allocation_information->get_DSPs(b);
183  double wa_a = allocation_information->get_area(a);
184  double wa_b = allocation_information->get_area(b);
185  return ((wm_a > wm_b) || (wm_a == wm_b && wd_a > wd_b) || (wm_a == wm_b && wd_a == wd_b && wa_a > wa_b) ||
186  (wm_a == wm_b && wd_a == wd_b && wa_a == wa_b && a < b));
187  }
188 
194  : allocation_information(_allocation_soluton)
195  {
196  }
197 
201  ~cdfc_resource_ordering_functor() = default;
202 };
203 
205  const CliqueCovering_Algorithm _clique_covering_algorithm)
206  : clique_covering_algorithm(_clique_covering_algorithm)
207 {
208 }
209 
211 {
213 }
214 
216 {
217  return STR(static_cast<unsigned int>(clique_covering_algorithm));
218 }
219 
221  const OpVertexSet& all_candidate_vertices)
222 {
223  const tree_managerRef TreeM = HLSMgr->get_tree_manager();
224  const FunctionBehaviorConstRef FB = HLSMgr->CGetFunctionBehavior(funId);
226  for(auto current_v : all_candidate_vertices)
227  {
229  "-->Considering operation " + GET_NAME(data, current_v) + " " +
230  STR(TreeM->CGetTreeNode(data->CGetOpNodeInfo(current_v)->GetNodeId())));
231  std::vector<HLS_manager::io_binding_type> vars_read = HLSMgr->get_required_values(HLS->functionId, current_v);
232  size_t n_ports = vars_read.size();
233  con_rel[current_v].resize(n_ports);
234  for(unsigned int port_index = 0; port_index < n_ports; ++port_index)
235  {
236  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Considering port " + STR(port_index));
237  auto tree_var = std::get<0>(vars_read[port_index]);
238  if(tree_var != 0)
239  {
240  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---" + STR(TreeM->CGetTreeNode(tree_var)));
241  CustomOrderedSet<std::pair<conn_code, std::pair<unsigned int, vertex>>>& con_rel_per_vertex_per_port_index =
242  con_rel[current_v][port_index];
243  const CustomOrderedSet<vertex>& running_states = HLS->Rliv->get_state_where_run(current_v);
244  for(const auto state : running_states)
245  {
246  if(tree_helper::is_parameter(TreeM, tree_var) || !HLS->Rliv->has_op_where_defined(tree_var))
247  {
248  con_rel_per_vertex_per_port_index.insert(
249  std::make_pair(no_def, std::make_pair(tree_var, NULL_VERTEX)));
250  }
251  else
252  {
253  vertex def_op = HLS->Rliv->get_op_where_defined(tree_var);
255  "Variable is defined in " + GET_NAME(data, def_op));
256  const CustomOrderedSet<vertex>& def_op_ending_states = HLS->Rliv->get_state_where_end(def_op);
257  if((GET_TYPE(data, def_op) & TYPE_PHI) == 0)
258  {
259  if(def_op_ending_states.find(state) != def_op_ending_states.end())
260  {
261  con_rel_per_vertex_per_port_index.insert(
262  std::make_pair(no_phi_chained, std::make_pair(tree_var, def_op)));
263  }
264  else if(HLS->storage_value_information->is_a_storage_value(state, tree_var))
265  {
266  auto storage_value = HLS->storage_value_information->get_storage_value_index(state, tree_var);
267  con_rel_per_vertex_per_port_index.insert(
268  std::make_pair(no_phi_no_chained, std::make_pair(storage_value, def_op)));
269  }
270  else
271  {
272  THROW_UNREACHABLE("unexpected");
273  }
274  }
275  else
276  {
277  auto storage_value = HLS->storage_value_information->get_storage_value_index(state, tree_var);
278  THROW_ASSERT(HLS->storage_value_information->is_a_storage_value(state, tree_var),
279  "unxpected case");
280  con_rel_per_vertex_per_port_index.insert(
281  std::make_pair(phi, std::make_pair(storage_value, def_op)));
282  }
283  }
284  }
285  }
286  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Considered port " + STR(port_index));
287  }
288  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Considered operation " + GET_NAME(data, current_v));
289  }
290 }
291 
292 template <bool do_estimation, bool do_conversion, typename vertex_type, class cluster_type>
293 void estimate_muxes(const connection_relation& con_rel, unsigned long long mux_prec, double& tot_mux_delay,
294  double& tot_mux_area, const cluster_type& cluster, unsigned int& total_muxes,
295  unsigned int& n_shared, const CustomUnorderedMap<vertex_type, vertex>& converter,
296  const HLS_managerRef HLSMgr, const hlsRef HLS,
297  int
298 #ifndef NDEBUG
299  debug_level
300 #endif
301 )
302 {
303  const fu_bindingRef fu = HLS->Rfu;
304  const FunctionBehaviorConstRef FB = HLSMgr->CGetFunctionBehavior(HLS->functionId);
306  bool has_register_done = HLS->Rreg && HLS->Rreg->size() != 0;
307  std::vector<CustomUnorderedSet<unsigned int>> regs_in;
308  std::vector<CustomUnorderedSet<unsigned int>> chained_in;
309  std::vector<CustomUnorderedSet<std::pair<unsigned int, unsigned int>>> module_in;
310  std::vector<CustomUnorderedSet<std::pair<unsigned int, unsigned int>>> module_in_reg;
312 #ifndef NDEBUG
314  CustomUnorderedSet<vertex> chained_out;
315  unsigned int n_tot_outgoing_edges = 0, n_tot_outgoing_unbound_operations = 0;
316 #endif
317  unsigned int n_tot_shared = 0;
318  unsigned int max_port_index = 0;
319  for(auto cv : cluster)
320  {
321  vertex current_v;
322  if(std::is_same<vertex, vertex_type>::value && !do_conversion)
323  {
324  current_v = reinterpret_cast<vertex>(cv);
325  }
326  else
327  {
328  current_v = converter.find(cv)->second;
329  }
330  max_port_index = std::max(max_port_index, static_cast<unsigned int>(con_rel.find(current_v)->second.size()));
331  }
332  regs_in.resize(max_port_index);
333  chained_in.resize(max_port_index);
334  module_in.resize(max_port_index);
335  module_in_reg.resize(max_port_index);
336 
337  for(auto cv : cluster)
338  {
339  vertex current_v;
340  if(std::is_same<vertex, vertex_type>::value && !do_conversion)
341  {
342  current_v = reinterpret_cast<vertex>(cv);
343  }
344  else
345  {
346  current_v = converter.find(cv)->second;
347  }
348  THROW_ASSERT(con_rel.find(current_v) != con_rel.end(), "missing vertex from con_rel data structure");
349  for(unsigned int port_index_actual = 0; port_index_actual < con_rel.find(current_v)->second.size();
350  ++port_index_actual)
351  {
352  auto port_index = port_index_actual;
354  con_rel_per_vertex_per_port_index = con_rel.find(current_v)->second[port_index];
355  if(fu->get_ports_are_swapped(current_v))
356  {
357  if(port_index_actual == 0)
358  {
359  port_index = 1;
360  }
361  else
362  {
363  port_index = 0;
364  }
365  }
366  for(auto triple : con_rel_per_vertex_per_port_index)
367  {
368  switch(triple.first)
369  {
370  case no_def:
371  {
372  auto tree_var = triple.second.first;
373  chained_in[port_index].insert(
374  tree_var);
375  break;
376  }
377  case no_phi_chained:
378  {
379  auto tree_var = triple.second.first;
380  vertex def_op = triple.second.second;
381  if(fu->get_index(def_op) != INFINITE_UINT)
382  {
383  module_in[port_index].insert(std::make_pair(fu->get_assign(def_op), fu->get_index(def_op)));
384  }
385  else
386  {
387  chained_in[port_index].insert(tree_var);
388  }
389  break;
390  }
391  case no_phi_no_chained:
392  {
393  auto storage_value = triple.second.first;
394  vertex def_op = triple.second.second;
395  if(has_register_done)
396  {
397  regs_in[port_index].insert(HLS->Rreg->get_register(storage_value));
398  }
399  else if(fu->get_index(def_op) != INFINITE_UINT)
400  {
401  module_in_reg[port_index].insert(std::make_pair(fu->get_assign(def_op), fu->get_index(def_op)));
402  }
403  else
404  {
405  regs_in[port_index].insert(storage_value);
406  }
407  break;
408  }
409  case phi:
410  {
411  auto storage_value = triple.second.first;
412  vertex def_op = triple.second.second;
413  if(has_register_done)
414  {
415  regs_in[port_index].insert(HLS->Rreg->get_register(storage_value));
416  }
417  else if(fu->get_index(def_op) != INFINITE_UINT)
418  {
419  module_in_reg[port_index].insert(std::make_pair(fu->get_assign(def_op), fu->get_index(def_op)));
420  }
421  else
422  {
423  regs_in[port_index].insert(storage_value);
424  }
425  break;
426  }
427  default:
428  {
429  THROW_ERROR("unexpected connection code");
430  }
431  }
432  }
433  }
434 
435 #ifndef NDEBUG
436  if(has_register_done)
437  {
438  auto var_written = HLSMgr->get_produced_value(HLS->functionId, current_v);
439  if(var_written)
440  {
441  const CustomOrderedSet<vertex>& end = HLS->Rliv->get_state_where_end(current_v);
442  for(const auto estate : end)
443  {
444  if(HLS->storage_value_information->is_a_storage_value(estate, var_written))
445  {
446  regs_out.insert(HLS->Rreg->get_register(
447  HLS->storage_value_information->get_storage_value_index(estate, var_written)));
448  }
449  else
450  {
451  chained_out.insert(estate);
452  }
453  }
454  }
455  }
456 #endif
457  for(const auto& oe : data->CGetOutEdges(current_v))
458  {
459  if((data->GetSelector(oe) & (DFG_SCA_SELECTOR | FB_DFG_SCA_SELECTOR)))
460  {
461 #ifndef NDEBUG
462  ++n_tot_outgoing_edges;
463 #endif
464  vertex tgt = boost::target(oe, *data);
465  if(fu->get_index(tgt) != INFINITE_UINT)
466  {
467  if(module_out.find(std::make_pair(fu->get_assign(tgt), fu->get_index(tgt))) != module_out.end())
468  {
469  ++n_tot_shared;
470  }
471  else
472  {
473  module_out.insert(std::make_pair(fu->get_assign(tgt), fu->get_index(tgt)));
474  }
475  }
476  else
477  {
478 #ifndef NDEBUG
479  ++n_tot_outgoing_unbound_operations;
480 #endif
481  }
482  }
483  }
484  }
485 
487  total_muxes = 0;
488  tot_mux_area = 0;
489  tot_mux_delay = 0;
490  for(unsigned int port_index = 0; port_index < max_port_index; ++port_index)
491  {
492  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---Ports " + STR(port_index));
493  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---Reg in ports: " + STR(regs_in[port_index].size()));
495  "---Chained in ports: " + STR(chained_in[port_index].size()));
497  "---Module in ports: " + STR(module_in[port_index].size()));
499  "---Module in reg ports: " + STR(module_in_reg[port_index].size()));
500  unsigned int current_muxs = 0;
501  current_muxs += static_cast<unsigned int>(regs_in[port_index].size());
502  current_muxs += static_cast<unsigned int>(chained_in[port_index].size());
503  current_muxs += static_cast<unsigned int>(module_in[port_index].size());
504  current_muxs += static_cast<unsigned int>(module_in_reg[port_index].size());
505  if(current_muxs > 1)
506  {
507  total_muxes += current_muxs - 1;
508  }
509  if(do_estimation)
510  {
511  tot_mux_area += HLS->allocation_information->estimate_muxNto1_area(mux_prec, current_muxs);
512  tot_mux_delay =
513  std::max(tot_mux_delay, HLS->allocation_information->estimate_muxNto1_delay(mux_prec, current_muxs));
514  }
515  }
517  "---total number of outgoing edges: " + STR(n_tot_outgoing_edges));
519  "---total number of outgoing unbound operations: " + STR(n_tot_outgoing_unbound_operations));
521  "---total number of outgoing bound modules: " + STR(module_out.size()));
522  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---total number of modules shared: " + STR(n_tot_shared));
523  if(has_register_done)
524  {
525  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---Reg out ports: " + STR(regs_out.size()));
526  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---Chained out ports: " + STR(chained_out.size()));
527  }
528  n_shared = n_tot_shared;
529 }
530 
531 struct slack_based_filtering : public filter_clique<vertex>
532 {
534  const CustomUnorderedMap<vertex, double>& _starting_time, double _controller_delay,
535  unsigned long long _mux_prec, const hlsRef _HLS, const HLS_managerRef _HLSMgr,
536  const double _area_resource, const connection_relation& _con_rel)
537  : slack_time(_slack_time),
538  starting_time(_starting_time),
539  controller_delay(_controller_delay),
540  mux_prec(_mux_prec),
541  HLS(_HLS),
542  HLSMgr(_HLSMgr),
543  data(_HLSMgr->CGetFunctionBehavior(_HLS->functionId)->CGetOpGraph(FunctionBehavior::FDFG)),
544  area_resource(_area_resource),
545  con_rel(_con_rel)
546  {
547  }
548 
550  const CustomUnorderedMap<C_vertex, vertex>& converter,
551  const cc_compatibility_graph& cg) const override
552  {
553  THROW_ASSERT(!candidate_clique.empty(), "candidate clique cannot be empty");
554  double min_slack = std::numeric_limits<double>::max();
555  C_vertex min_slack_vertex = *candidate_clique.begin();
556  vertex current_v;
557  unsigned int total_muxes;
558  unsigned int n_shared;
559  double mux_area_estimation, mux_time_estimation;
560  estimate_muxes<true, true, C_vertex>(con_rel, mux_prec, mux_time_estimation, mux_area_estimation,
561  candidate_clique, total_muxes, n_shared, converter, HLSMgr, HLS,
562  HLS->debug_level);
563 
564  double max_starting_time = 0.0;
565  for(auto current_c : candidate_clique)
566  {
567  current_v = converter.find(current_c)->second;
568  max_starting_time = std::max(max_starting_time, starting_time.find(current_v)->second);
569  }
570  if(max_starting_time < controller_delay && total_muxes > 0)
571  {
572  max_starting_time = controller_delay;
573  }
574  auto vert_it_end = candidate_clique.end();
575  for(auto vert_it = candidate_clique.begin(); vert_it != vert_it_end; ++vert_it)
576  {
577  THROW_ASSERT(converter.find(*vert_it) != converter.end(), "non-existing vertex");
578  current_v = converter.find(*vert_it)->second;
579  double curr_slack =
580  slack_time.find(current_v)->second - (max_starting_time - starting_time.find(current_v)->second);
581 
582  // THROW_ASSERT(curr_slack>=0.0, "negative slack not allowed");
583  // std::cerr << "Current slack " << curr_slack << " for vertex " << GET_NAME(data, current_v) << std::endl;
584  if(curr_slack < min_slack)
585  {
586  min_slack_vertex = *vert_it;
587  min_slack = curr_slack;
588  }
589  else if(curr_slack == min_slack && min_slack_vertex != *vert_it)
590  {
591  int weight1 = 0;
592  boost::graph_traits<cc_compatibility_graph>::out_edge_iterator ei, ei_end;
593  boost::tie(ei, ei_end) = boost::out_edges(*vert_it, cg);
594  for(; ei != ei_end; ++ei)
595  {
596  if(candidate_clique.find(boost::target(*ei, cg)) != candidate_clique.end())
597  {
598  weight1 += cg[*ei].weight;
599  }
600  }
601  int weight2 = 0;
602  boost::tie(ei, ei_end) = boost::out_edges(min_slack_vertex, cg);
603  for(; ei != ei_end; ++ei)
604  {
605  if(candidate_clique.find(boost::target(*ei, cg)) != candidate_clique.end())
606  {
607  weight2 += cg[*ei].weight;
608  }
609  }
610 
611  if(weight1 < weight2)
612  {
613  min_slack_vertex = *vert_it;
614  }
615  }
616  }
617  // std::cerr << "Min_slack " << min_slack << " mux_delay " << mux_delay << std::endl;
619  if(total_muxes > 0 && min_slack < 0)
620  {
621  v = min_slack_vertex;
622  // std::cerr << "Removed0 " << GET_NAME(data, converter.find(v)->second) << " Slack "<< min_slack << "
623  // mux_delay " << mux_delay << std::endl;
624  return true;
625  }
627 
628  if(total_muxes > 0 &&
629  ((mux_area_estimation + area_resource) >=
630  (static_cast<double>(candidate_clique.size() + (total_muxes <= n_shared ? n_shared : 0)) * area_resource)))
631  {
632  v = min_slack_vertex;
633  // std::cerr << "Removed1 " << GET_NAME(data, converter.find(v)->second) << " Slack "<< min_slack << "
634  // mux_delay " << mux_delay << std::endl;
635  return true;
636  }
637 
638  if(total_muxes > 0 && mux_time_estimation > min_slack)
639  {
640  v = min_slack_vertex;
641  // std::cerr << "Removed " << GET_NAME(data, converter.find(v)->second) << " Slack "<< min_slack << " levels "
642  // << levels_in << " mux_delay " << mux_delay << std::endl;
643  return true;
644  }
645  else
646  {
647  // std::cerr << "No vertex removed " << " Slack "<< min_slack << " levels " << levels_in << " mux_delay " <<
648  // mux_delay << std::endl;
649  return false;
650  }
651  }
652 
653  size_t clique_cost(const CustomOrderedSet<C_vertex>& candidate_clique,
654  const CustomUnorderedMap<C_vertex, vertex>& converter) const override
655  {
656  unsigned int total_muxes;
657  unsigned int n_shared;
658  double mux_area_estimation, mux_time_estimation;
659  estimate_muxes<false, true, C_vertex>(con_rel, mux_prec, mux_time_estimation, mux_area_estimation,
660  candidate_clique, total_muxes, n_shared, converter, HLSMgr, HLS,
661  HLS->debug_level);
662  return static_cast<size_t>(total_muxes);
663  }
664 
665  bool is_filtering() const override
666  {
667  return true;
668  }
669 
670  private:
674  const unsigned long long mux_prec;
675  const hlsRef HLS;
678  const double area_resource;
680 };
681 
682 CdfcEdgeInfo::CdfcEdgeInfo(const int _edge_weight) : edge_weight(_edge_weight)
683 {
684 }
685 
686 class CdfcWriter : public VertexWriter
687 {
688  private:
691 
693 
696 
697  public:
702  explicit CdfcWriter(const CdfcGraph* _printing_graph)
703  : VertexWriter(_printing_graph, 0),
704  cdfc_graph_info(GetPointer<const CdfcGraphInfo>(_printing_graph->CGetGraphInfo())),
705  c2s(cdfc_graph_info->c2s),
706  operation_writer(cdfc_graph_info->operation_graph.get(), 0)
707  {
708  }
709 
715  void operator()(std::ostream& out, const vertex& v) const override
716  {
717  operation_writer(out, c2s.find(v)->second);
718  }
719 };
720 
722 {
723  public:
729  explicit CdfcEdgeWriter(const CdfcGraph* _printing_graph) : EdgeWriter(_printing_graph, 0)
730  {
731  }
732 
738  void operator()(std::ostream& out, const EdgeDescriptor& e) const override
739  {
740  const auto* edge_info = Cget_edge_info<CdfcEdgeInfo>(e, *printing_graph);
741  if(edge_info)
742  {
743  out << "[label=\"" << edge_info->edge_weight << "\"]";
744  }
745  else
746  {
747  out << "[color=red3]";
748  }
749  }
750 };
751 
753  : c2s(_c2s), operation_graph(_operation_graph)
754 {
755 }
756 
758  : graphs_collection(RefcountCast<GraphInfo>(cdfc_graph_info), _parameters)
759 {
760 }
761 
763 
764 CdfcGraph::CdfcGraph(const CdfcGraphsCollectionRef cdfc_graphs_collection, const int _selector)
765  : graph(cdfc_graphs_collection.get(), _selector)
766 {
767 }
768 
769 CdfcGraph::CdfcGraph(const CdfcGraphsCollectionRef cdfc_graphs_collection, const int _selector,
770  const CustomUnorderedSet<vertex>& vertices)
771  : graph(cdfc_graphs_collection.get(), _selector, vertices)
772 {
773 }
774 
775 CdfcGraph::~CdfcGraph() = default;
776 
777 void CdfcGraph::WriteDot(const std::string& file_name, const int) const
778 {
779  const auto* cdfc_graph_info = GetPointer<const CdfcGraphInfo>(CGetGraphInfo());
780  const BehavioralHelperConstRef behavioral_helper = cdfc_graph_info->operation_graph->CGetOpGraphInfo()->BH;
781  const std::string output_directory = collection->parameters->getOption<std::string>(OPT_dot_directory) + "/" +
782  behavioral_helper->get_function_name() + "/";
783  if(!std::filesystem::exists(output_directory))
784  {
785  std::filesystem::create_directories(output_directory);
786  }
787  const std::string full_name = output_directory + file_name;
788  const VertexWriterConstRef cdfc_writer(new CdfcWriter(this));
789  const EdgeWriterConstRef cdfc_edge_writer(new CdfcEdgeWriter(this));
790  InternalWriteDot<const CdfcWriter, const CdfcEdgeWriter>(full_name, cdfc_writer, cdfc_edge_writer);
791 }
792 
794  unsigned int _funId, const DesignFlowManagerConstRef _design_flow_manager,
795  const HLSFlowStepSpecializationConstRef _hls_flow_step_specialization)
796  : fu_binding_creator(_parameters, _HLSMgr, _funId, _design_flow_manager, HLSFlowStep_Type::CDFC_MODULE_BINDING,
797  _hls_flow_step_specialization ?
798  _hls_flow_step_specialization :
800  _parameters->getOption<CliqueCovering_Algorithm>(OPT_cdfc_module_binding_algorithm)))),
801  small_normalized_resource_area(_parameters->IsParameter("small_normalized_resource_area") ?
802  _parameters->GetParameter<double>("small_normalized_resource_area") :
804 {
805  debug_level = _parameters->get_class_debug_level(GET_CLASS(*this));
806 }
807 
809 
812  CustomUnorderedMap<vertex, double>& starting_time,
813  bool update_starting_time, bool only_backward, bool only_forward)
814 {
815  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Updating slack starting time");
816  while(!sorted_vertices.empty())
817  {
818  vertex curr_vertex = *sorted_vertices.begin();
819  INDENT_OUT_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Considering " + GET_NAME(fdfg, curr_vertex));
820  sorted_vertices.erase(curr_vertex);
821  double current_budget = slack_time[curr_vertex];
822  double new_current_budget = current_budget;
823  if(!only_forward)
824  {
825  for(const auto& ie : fdfg->CGetInEdges(curr_vertex))
826  {
827  if(fdfg->GetSelector(ie) & DFG_SCA_SELECTOR)
828  {
829  vertex src = boost::source(ie, *fdfg);
830  if(HLS->chaining_information->may_be_chained_ops(curr_vertex, src) &&
833  {
834  if(slack_time[src] > new_current_budget)
835  {
836  slack_time[src] = new_current_budget;
838  "---Reducing slack time of " + GET_NAME(fdfg, src) + " to " + STR(slack_time[src]) +
839  " because of " + GET_NAME(fdfg, curr_vertex));
840  sorted_vertices.insert(src);
841  }
842  else if(slack_time[src] < new_current_budget)
843  {
844  new_current_budget = slack_time[src];
845  }
846  }
847  }
848  }
849  }
850  if(!only_backward)
851  {
852  for(const auto& oe : fdfg->CGetOutEdges(curr_vertex))
853  {
854  if(fdfg->GetSelector(oe) & DFG_SCA_SELECTOR)
855  {
856  vertex tgt = boost::target(oe, *fdfg);
857  if(HLS->chaining_information->may_be_chained_ops(curr_vertex, tgt))
858  {
859  if(HLS->chaining_information->get_representative_in(curr_vertex) ==
861  {
862  if(slack_time[tgt] > new_current_budget)
863  {
864  if(update_starting_time)
865  {
866  starting_time[tgt] += slack_time[tgt] - new_current_budget;
868  "---Updating starting time of " + GET_NAME(fdfg, tgt) + " to " +
869  STR(starting_time[tgt]) + " because of " + GET_NAME(fdfg, curr_vertex));
870  }
871  slack_time[tgt] = new_current_budget;
873  "---Reducing slack time of " + GET_NAME(fdfg, tgt) + " to " +
874  STR(slack_time[tgt]) + " because of " + GET_NAME(fdfg, curr_vertex));
875  sorted_vertices.insert(tgt);
876  }
877  else if(slack_time[tgt] < new_current_budget)
878  {
879  new_current_budget = slack_time[tgt];
880  }
881  }
882  }
883  }
884  }
885  }
886  if(new_current_budget < current_budget)
887  {
888  sorted_vertices.insert(curr_vertex);
889  slack_time[curr_vertex] = new_current_budget;
890  }
892  }
893  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Updated slack starting time");
894 }
895 
896 #ifdef HC_APPROACH
897 static vertex get_src_vertex(unsigned int var_written, vertex tgt, const HLS_managerRef HLSMgr, unsigned int functionId,
898  const OpGraphConstRef fsdg)
899 {
900  InEdgeIterator ie, ie_end;
901  for(boost::tie(ie, ie_end) = boost::in_edges(tgt, *fsdg); ie != ie_end; ++ie)
902  {
903  vertex src = boost::source(*ie, *fsdg);
904  unsigned int vw = HLSMgr->get_produced_value(functionId, src);
905  if(var_written == vw)
906  return src;
907  }
908  return NULL_VERTEX;
909 }
910 #endif
911 
912 static inline bool compute_condition1(const std::string& lib_name,
913  const AllocationInformationConstRef allocation_information, double local_mux_time,
914  unsigned int fu_s1)
915 {
916  bool cond1 = local_mux_time > 0 && lib_name != WORK_LIBRARY && lib_name != PROXY_LIBRARY &&
917  allocation_information->get_number_channels(fu_s1) < 1 &&
918  allocation_information->get_worst_number_of_cycles(fu_s1) <= 1 &&
919  allocation_information->get_number_fu(fu_s1) == INFINITE_UINT;
920 
921  return cond1;
922 }
923 
924 static inline bool compute_condition2(bool cond1, unsigned long long fu_prec, double resource_area,
925  const double small_normalized_resource_area)
926 {
927  bool cond2 = cond1 && (fu_prec <= 8 || resource_area < small_normalized_resource_area);
928 
929  return cond2;
930 }
931 
933 {
934  long step_time = 0;
936  {
937  START_TIME(step_time);
938  }
939  const tree_managerRef TreeM = HLSMgr->get_tree_manager();
940  HLS->Rliv->set_HLS(HLS);
941 
942  // resource binding and allocation info
943  fu_bindingRef fu = HLS->Rfu;
944  const AllocationInformationConstRef allocation_information = HLS->allocation_information;
945 
946  cdfc_resource_ordering_functor r_functor(allocation_information);
947  double setup_hold_time = allocation_information->get_setup_hold_time();
948 
949  // pointer to a Control, Data dependence and anti-dependence graph graph
950  const FunctionBehaviorConstRef FB = HLSMgr->CGetFunctionBehavior(funId);
952 #ifdef HC_APPROACH
954 #endif
956  size_t total_modules_allocated = 0;
957  double total_resource_area = 0, total_DSPs = 0;
958  double total_area_muxes = 0;
959 
960  unsigned int fu_unit;
961  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Computing non-shared resources");
964  // HLS->Rliv->compute_conflicts_with_reachability(HLS);
965  for(const auto operation : fdfg->CGetOperations())
966  {
967  fu_unit = fu->get_assign(operation);
968  if(allocation_information->is_vertex_bounded(fu_unit))
969  {
970  continue;
971  }
972  if(n_shared_fu.find(fu_unit) == n_shared_fu.end())
973  {
974  n_shared_fu[fu_unit] = 1;
975  }
976  else
977  {
978  n_shared_fu[fu_unit] = 1 + n_shared_fu[fu_unit];
979  }
980  }
981  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Computed non-shared resources");
983  std::map<unsigned int, OpVertexSet, cdfc_resource_ordering_functor> candidate_vertices(r_functor);
984  OpVertexSet all_candidate_vertices(fdfg);
986  for(const auto operation : fdfg->CGetOperations())
987  {
988  fu_unit = fu->get_assign(operation);
989  if(fu->get_index(operation) != INFINITE_UINT)
990  {
991  ++total_modules_allocated;
992  total_resource_area += allocation_information->get_area(fu_unit);
993  total_DSPs += allocation_information->get_DSPs(fu_unit);
994  if(!allocation_information->is_vertex_bounded(fu_unit) && n_shared_fu.find(fu_unit)->second != 1)
995  {
997  "---Easy binding for -> " + GET_NAME(sdg, operation) + "-" +
998  sdg->CGetOpNodeInfo(operation)->GetOperation() + "(" +
999  allocation_information->get_string_name(fu_unit) + ")");
1000  easy_bound_vertices[fu_unit].insert(operation);
1001  }
1002  else
1003  {
1005  "---Non-easy binding for -> " + GET_NAME(sdg, operation) + "-" +
1006  sdg->CGetOpNodeInfo(operation)->GetOperation() + "(" +
1007  allocation_information->get_string_name(fu_unit) + ")");
1008  }
1009  }
1010  else
1011  {
1012  if(allocation_information->is_vertex_bounded(fu_unit) ||
1013  (allocation_information->is_memory_unit(fu_unit) &&
1014  (!allocation_information->is_readonly_memory_unit(fu_unit) ||
1015  (!allocation_information->is_one_cycle_direct_access_memory_unit(fu_unit) &&
1016  (!parameters->isOption(OPT_rom_duplication) || !parameters->getOption<bool>(OPT_rom_duplication)))) &&
1017  allocation_information->get_number_channels(fu_unit) == 1) ||
1018  n_shared_fu.find(fu_unit)->second == 1)
1019  {
1020  ++total_modules_allocated;
1021  total_resource_area += allocation_information->get_area(fu_unit);
1022  total_DSPs += allocation_information->get_DSPs(fu_unit);
1023  fu->bind(operation, fu_unit, 0);
1024  if(allocation_information->is_memory_unit(fu_unit))
1025  {
1027  "---Easy binding for -> " + GET_NAME(sdg, operation) + "-" +
1028  sdg->CGetOpNodeInfo(operation)->GetOperation() + "(" +
1029  allocation_information->get_string_name(fu_unit) + ")");
1030  easy_bound_vertices[fu_unit].insert(operation);
1031  }
1032  else
1033  {
1035  "---Non-easy binding for -> " + GET_NAME(sdg, operation) + "-" +
1036  sdg->CGetOpNodeInfo(operation)->GetOperation() + "(" +
1037  allocation_information->get_string_name(fu_unit) + ")");
1038  }
1039  }
1040  else
1041  {
1042  if(candidate_vertices.find(fu_unit) == candidate_vertices.end())
1043  {
1044  candidate_vertices.insert(std::make_pair(fu_unit, OpVertexSet(sdg)));
1045  }
1046  candidate_vertices.find(fu_unit)->second.insert(operation);
1047  all_candidate_vertices.insert(operation);
1048  }
1049  }
1050  }
1051  long clique_cputime = 0;
1052  long falseloop_cputime = 0;
1053  long weight_cputime = 0;
1054  long slack_cputime = 0;
1055  long clique_iteration_cputime = 0;
1056  unsigned int n_performance_conflicts = 0;
1057 
1059  if(!candidate_vertices.empty())
1060  {
1061  std::vector<vertex> c2s;
1062  c2s.reserve(boost::num_vertices(*fdfg));
1064 
1065  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "Creating the cdfc for the module binding...");
1066  connection_relation con_rel;
1067  initialize_connection_relation(con_rel, all_candidate_vertices);
1068  boost_cdfc_graphRef cdfc_bulk_graph = boost_cdfc_graphRef(new boost_cdfc_graph());
1070  {
1071  START_TIME(slack_cputime);
1072  }
1073  // std::map<size_t,CustomOrderedSet<vertex> >chained_relation;
1074  std::list<vertex> sorted_vertices;
1075  sdg->TopologicalSort(sorted_vertices);
1076  CustomUnorderedMap<vertex, double> starting_time;
1079  double clock_period_resource_fraction = HLS->HLS_C->get_clock_period_resource_fraction();
1080  double actual_scheduling_clock_budget = HLS->HLS_C->get_clock_period() * clock_period_resource_fraction;
1081  const double clock_budget = CLOCK_MARGIN * actual_scheduling_clock_budget;
1083  "---clock_budget=" + STR(clock_budget) + " setup_hold_time=" + STR(setup_hold_time));
1084  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Computing starting times");
1085  for(const auto operation : sorted_vertices)
1086  {
1087  if(GET_TYPE(fdfg, operation) & (TYPE_ENTRY | TYPE_EXIT))
1088  {
1089  starting_time[operation] = 0.0;
1090  }
1091  else
1092  {
1093  const auto statement_index = fdfg->CGetOpNodeInfo(operation)->GetNodeId();
1094  starting_time[operation] = HLS->Rsch->GetStartingTime(statement_index) -
1095  (from_strongtype_cast<double>(HLS->Rsch->get_cstep(statement_index).second) *
1096  actual_scheduling_clock_budget);
1097  }
1098  }
1099  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Computed starting times");
1100  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Computing Latest ending times");
1101  for(const auto operation : boost::adaptors::reverse(sorted_vertices))
1102  {
1104  double delay = HLS->Rsch->get_fo_correction(fdfg->CGetOpNodeInfo(operation)->GetNodeId(), 0);
1105  double current_ending_time;
1106  if(GET_TYPE(fdfg, operation) & (TYPE_ENTRY | TYPE_EXIT))
1107  {
1108  current_ending_time = 0;
1109  }
1110  else
1111  {
1112  const auto statement_index = fdfg->CGetOpNodeInfo(operation)->GetNodeId();
1113  auto fu_type = fu->get_assign(operation);
1114  const auto ii_time = allocation_information->get_initiation_time(fu_type, statement_index);
1115  const auto n_cycles = allocation_information->get_cycles(fu_type, statement_index);
1116  if(ii_time != (0u))
1117  {
1118  current_ending_time = delay + HLS->Rsch->GetEndingTime(statement_index) -
1119  (from_strongtype_cast<double>(HLS->Rsch->get_cstep_end(statement_index).second) *
1120  actual_scheduling_clock_budget) +
1121  starting_time.find(operation)->second;
1122  }
1123  else if(n_cycles > 1)
1124  {
1125  current_ending_time = clock_budget - setup_hold_time;
1126  }
1127  else
1128  {
1129  current_ending_time = delay + HLS->Rsch->GetEndingTime(statement_index) -
1130  (from_strongtype_cast<double>(HLS->Rsch->get_cstep_end(statement_index).second) *
1131  actual_scheduling_clock_budget);
1132  }
1133  }
1135  "---current_ending_time for " + GET_NAME(sdg, operation) + "=" + STR(current_ending_time));
1136  double current_budget = clock_budget - current_ending_time - setup_hold_time;
1138  "---Initial Current_budget/Slack for " + GET_NAME(sdg, operation) + "=" + STR(current_budget));
1139  if(current_budget < 0.0)
1140  {
1141  current_budget = 0.0;
1142  }
1143  OutEdgeIterator oe, oe_end;
1144  for(boost::tie(oe, oe_end) = boost::out_edges(operation, *fdfg); oe != oe_end; ++oe)
1145  {
1146  if(fdfg->GetSelector(*oe) & DFG_SCA_SELECTOR)
1147  {
1148  vertex tgt = boost::target(*oe, *fdfg);
1150  {
1153  {
1154  current_ending_time = std::max(ending_time[tgt], current_ending_time);
1155  current_budget = std::min(current_budget, slack_time[tgt]);
1156  }
1157  }
1158  }
1159  }
1160  ending_time[operation] = current_ending_time;
1162  "---Current_budget/Slack for " + GET_NAME(sdg, operation) + "=" + STR(current_budget));
1163  slack_time[operation] = current_budget;
1164  }
1165  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Computed Latest ending times and slacks");
1166  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Starting time *** Latest ending time *** Slacks");
1167 #ifndef NDEBUG
1168  for(const auto operation : sdg->CGetOperations())
1169  {
1171  "---" + GET_NAME(sdg, operation) +
1172  " *** starting_time=" + STR(starting_time.find(operation)->second) +
1173  " *** latest_ending_time=" + STR(ending_time.find(operation)->second) +
1174  " *** slack_time=" + STR(slack_time.find(operation)->second));
1175  THROW_ASSERT(ending_time.find(operation)->second >= starting_time.find(operation)->second,
1176  "wrong starting/ending for operation " + GET_NAME(sdg, operation));
1177  }
1178 #endif
1180  OpVertexSet to_update(fdfg);
1181  to_update.insert(sorted_vertices.begin(), sorted_vertices.end());
1182 
1183  update_slack_starting_time(fdfg, to_update, slack_time, starting_time, false, false, false);
1184 
1186  "-->Updated Starting time *** Latest ending time *** Slacks");
1187 #ifndef NDEBUG
1188  for(const auto operation : sdg->CGetOperations())
1189  {
1191  "---" + GET_NAME(sdg, operation) +
1192  " *** starting_time=" + STR(starting_time.find(operation)->second) +
1193  " *** latest_ending_time=" + STR(ending_time.find(operation)->second) +
1194  " *** slack_time=" + STR(slack_time.find(operation)->second));
1195  }
1196 #endif
1198 
1200  {
1201  STOP_TIME(slack_cputime);
1203  "---Slack computed in " + print_cpu_time(slack_cputime) + " seconds");
1204  }
1205 
1206  boost::graph_traits<graph>::vertices_size_type n_vert = boost::num_vertices(*fdfg);
1207  std::vector<boost::graph_traits<OpGraph>::vertices_size_type> rank_map(n_vert);
1208  std::vector<vertex> pred_map(n_vert);
1209  using const_vertex_index_pmap_t = boost::property_map<OpGraph, boost::vertex_index_t>::const_type;
1210  const_vertex_index_pmap_t cindex_pmap = boost::get(boost::vertex_index_t(), *fdfg);
1212  using op_rank_pmap_type =
1213  boost::iterator_property_map<std::vector<boost::graph_traits<OpGraph>::vertices_size_type>::iterator,
1214  const_vertex_index_pmap_t>;
1215  op_rank_pmap_type rank_pmap = boost::make_iterator_property_map(rank_map.begin(), cindex_pmap, rank_map[0]);
1217  using op_pred_pmap_type = boost::iterator_property_map<std::vector<vertex>::iterator, const_vertex_index_pmap_t>;
1218  op_pred_pmap_type pred_pmap = boost::make_iterator_property_map(pred_map.begin(), cindex_pmap, pred_map[0]);
1219  boost::disjoint_sets<op_rank_pmap_type, op_pred_pmap_type> ds(rank_pmap, pred_pmap);
1220  VertexIterator vi, vi_end;
1221  for(boost::tie(vi, vi_end) = boost::vertices(*fdfg); vi != vi_end; ++vi)
1222  {
1223  vertex s = *vi;
1224  ds.make_set(s);
1225  }
1226 
1228  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---merge easy bound vertices");
1229  for(const auto& fu_eb : easy_bound_vertices)
1230  {
1231  std::map<unsigned int, vertex> rep_vertex;
1232  for(const auto& cur_v : fu_eb.second)
1233  {
1234  auto vertex_index = fu->get_index(cur_v);
1235  if(rep_vertex.find(vertex_index) == rep_vertex.end())
1236  {
1237  rep_vertex[vertex_index] = cur_v;
1238  }
1239  else
1240  {
1241  vertex rep = rep_vertex.find(vertex_index)->second;
1242  ds.union_set(cur_v, rep);
1243  starting_time[cur_v] = starting_time[rep] = std::max(starting_time[rep], starting_time[cur_v]);
1244  ending_time[cur_v] = ending_time[rep] = std::max(ending_time[rep], ending_time[cur_v]);
1245  }
1246  }
1247  }
1248 
1250  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---add the vertices to the cdfc graph");
1251  for(boost::tie(vi, vi_end) = boost::vertices(*fdfg); vi != vi_end; ++vi)
1252  {
1253  vertex s = *vi;
1254  vertex rep = ds.find_set(s);
1255  cdfc_vertex C;
1256  if(rep == s && s2c.find(rep) == s2c.end())
1257  {
1258  C = boost::add_vertex(*cdfc_bulk_graph);
1259  THROW_ASSERT(boost::get(boost::vertex_index, *cdfc_bulk_graph, C) == c2s.size(), "unexpected case");
1260  c2s.push_back(rep);
1261  }
1262  else if(s2c.find(rep) == s2c.end())
1263  {
1264  C = boost::add_vertex(*cdfc_bulk_graph);
1265  THROW_ASSERT(boost::get(boost::vertex_index, *cdfc_bulk_graph, C) == c2s.size(), "unexpected case");
1266  c2s.push_back(rep);
1267  s2c[rep] = C;
1268  }
1269  else
1270  {
1271  C = s2c[rep];
1272  }
1273  s2c[s] = C;
1274  }
1275 
1278  "---add the control dependencies edges and the chained edges to the cdfc graph");
1280  EdgeIterator ei, ei_end;
1281  for(boost::tie(ei, ei_end) = boost::edges(*sdg); ei != ei_end; ++ei)
1282  {
1283  vertex src = boost::source(*ei, *sdg);
1284  auto fu_unit_src = fu->get_assign(src);
1285  const auto II_src = allocation_information->get_initiation_time(fu_unit_src, src);
1286  vertex tgt = boost::target(*ei, *sdg);
1288  && II_src == 0
1289  )
1290  {
1291  cdfc_vertex cdfc_src = s2c[src];
1292  cdfc_vertex cdfc_tgt = s2c[tgt];
1293  if(cdfc_src != cdfc_tgt)
1294  {
1295  bool exists;
1296  cdfc_edge E;
1297  boost::tie(E, exists) = boost::edge(cdfc_src, cdfc_tgt, *cdfc_bulk_graph);
1298  // std::cerr << "Chained " << cdfc_src << "-" << cdfc_tgt << " -- " << GET_NAME(sdg, src) << "-" <<
1299  // GET_NAME(sdg, tgt) << std::endl;
1300  if(!exists)
1301  {
1302  boost::tie(E, exists) =
1303  boost::add_edge(cdfc_src, cdfc_tgt, edge_cdfc_selector(CD_EDGE), *cdfc_bulk_graph);
1304  THROW_ASSERT(exists, "already inserted edge");
1305  }
1306  }
1307  else
1308  {
1309  THROW_ERROR(std::string(GET_NAME(sdg, src)) + "(" + sdg->CGetOpNodeInfo(src)->GetOperation() + ")--" +
1310  GET_NAME(sdg, tgt) + "(" + sdg->CGetOpNodeInfo(tgt)->GetOperation() + ")");
1311  }
1312  }
1313  }
1314 
1315  double clock_cycle = HLS->HLS_C->get_clock_period() * CLOCK_MARGIN * clock_period_resource_fraction;
1316 
1317 #ifdef HC_APPROACH
1318  spec_hierarchical_clustering hc;
1319 
1321  for(const auto& fu_cv : candidate_vertices)
1322  {
1323  for(const auto& cv : fu_cv.second)
1324  {
1325  unsigned int fu_s1 = fu_cv.first;
1326  const double mux_time = MODULE_BINDING_MUX_MARGIN * allocation_information->estimate_mux_time(fu_s1);
1327  if(!can_be_clustered(cv, fsdg, fu, slack_time, mux_time))
1328  {
1329  // fu_s1 = fu->get_assign(cv);
1330  // double exec_time = allocation_information->get_worst_execution_time(fu_s1);
1331  // double stage_time = allocation_information->get_worst_stage_period(fu_s1);
1332  // double resource_area = allocation_information->compute_normalized_area(fu_s1, vars_read1.size());
1333  // std::cerr << "NOPERATION=" << fsdg->CGetOpNodeInfo(cv)->GetOperation() << " area " << resource_area <<
1334  // "-" << has_a_constant_in << " e=" << exec_time << " s=" << stage_time << std::endl;
1335  continue;
1336  }
1337  double resource_area = allocation_information->compute_normalized_area(fu_s1);
1338  resource_area += allocation_information->get_DSPs(fu_s1);
1339 
1340  // double exec_time = allocation_information->get_worst_execution_time(fu_s1);
1341  // double stage_time = allocation_information->get_worst_stage_period(fu_s1);
1342  hc.add_vertex(boost::get(boost::vertex_index, *fsdg, cv), GET_NAME(fsdg, cv), fu->get_assign(cv),
1343  resource_area);
1344  }
1345  }
1347  for(const auto& fu_cv : candidate_vertices)
1348  {
1349  const CustomOrderedSet<vertex>::const_iterator cv_it_end = fu_cv.second.end();
1350  for(CustomOrderedSet<vertex>::const_iterator cv_it = fu_cv.second.begin(); cv_it != cv_it_end;)
1351  {
1353  ++cv_it;
1354  for(CustomOrderedSet<vertex>::const_iterator cv2_it = cv_it; cv2_it != cv_it_end; ++cv2_it)
1355  {
1356  if(!can_be_clustered(*cv1_it, fsdg, fu, slack_time, 0))
1357  continue;
1358  if(!can_be_clustered(*cv2_it, fsdg, fu, slack_time, 0))
1359  continue;
1360  if(clock_cycle <
1361  (setup_hold_time + starting_time[*cv1_it] + ending_time[*cv2_it] - starting_time[*cv2_it]))
1362  hc.add_tabu_pair(boost::get(boost::vertex_index, *fsdg, *cv1_it),
1363  boost::get(boost::vertex_index, *fsdg, *cv2_it));
1364  if(clock_cycle <
1365  (setup_hold_time + starting_time[*cv2_it] + ending_time[*cv1_it] - starting_time[*cv1_it]))
1366  hc.add_tabu_pair(boost::get(boost::vertex_index, *fsdg, *cv1_it),
1367  boost::get(boost::vertex_index, *fsdg, *cv2_it));
1368  if(HLS->Rliv->are_in_conflict(*cv1_it, *cv2_it))
1369  hc.add_tabu_pair(boost::get(boost::vertex_index, *fsdg, *cv1_it),
1370  boost::get(boost::vertex_index, *fsdg, *cv2_it));
1371  // else if(allocation_information->get_worst_execution_time(fu_s1)==0)
1372  // hc.add_tabu_pair(boost::get(boost::vertex_index, *fsdg, *cv1_it), boost::get(boost::vertex_index,
1373  // *fsdg, *cv2_it));
1374  }
1375  }
1376  }
1378  for(const auto& fu_cv : candidate_vertices)
1379  {
1380  for(const auto& cv : fu_cv.second)
1381  {
1382  unsigned int fu_s1 = fu_cv.first;
1383  const double mux_time = MODULE_BINDING_MUX_MARGIN * allocation_information->estimate_mux_time(fu_s1);
1384  if(!can_be_clustered(cv, fsdg, fu, slack_time, mux_time))
1385  continue;
1386  std::vector<HLS_manager::io_binding_type> vars_read1 = HLSMgr->get_required_values(HLS->functionId, cv);
1387  unsigned int index = 0;
1388  for(auto var_pair : vars_read1)
1389  {
1390  unsigned int var_written = std::get<0>(var_pair);
1391  vertex tgt = cv;
1392  vertex src = get_src_vertex(var_written, tgt, HLSMgr, HLS->functionId, fsdg);
1393  if(src != NULL_VERTEX)
1394  {
1395  if(all_candidate_vertices.find(src) != all_candidate_vertices.end() &&
1396  can_be_clustered(src, fsdg, fu, slack_time, 0) &&
1398  hc.add_edge(boost::get(boost::vertex_index, *fsdg, src),
1399  boost::get(boost::vertex_index, *fsdg, tgt),
1400  2 * index + (HLS->chaining_information->may_be_chained_ops(src, tgt) ? 1 : 0));
1401  }
1402  ++index;
1403  }
1404  }
1405  }
1406 #if 0
1407  for(tie(ei, ei_end) = boost::edges(*fsdg); ei != ei_end; ++ei)
1408  {
1409  vertex src = boost::source(*ei, *fsdg);
1410  if(!can_be_clustered(src, fsdg, fu, slack_time, 0)) continue;
1411  if(all_candidate_vertices.find(src) == all_candidate_vertices.end()) continue;
1412  vertex tgt = boost::target(*ei, *fsdg);
1413  if(!can_be_clustered(tgt, fsdg, fu, slack_time, 0)) continue;
1414  if(all_candidate_vertices.find(tgt) == all_candidate_vertices.end()) continue;
1416  hc.add_edge(boost::get(boost::vertex_index, *fsdg, src), boost::get(boost::vertex_index, *fsdg, tgt), 2*get_src_vertex(src, tgt, HLSMgr, HLS->functionId)+(HLS->chaining_information->may_be_chained_ops(src, tgt)?1:0));
1417  }
1418 #endif
1419 
1420  hc.exec();
1421 #endif
1422 
1424  {
1425  START_TIME(weight_cputime);
1426  }
1427 
1428  int _w = 1;
1429 
1430  // Do a preliminary register binding to help the sharing of complex operations
1432  "---Do a preliminary register binding to help the sharing of complex operations");
1433  if(parameters->getOption<bool>(OPT_shared_input_registers))
1434  {
1435  DesignFlowStepRef regb;
1436  if(parameters->getOption<HLSFlowStep_Type>(OPT_register_allocation_algorithm) ==
1438  {
1439  regb =
1440  GetPointer<const HLSFlowStepFactory>(design_flow_manager.lock()->CGetDesignFlowStepFactory("HLS"))
1441  ->CreateHLSFlowStep(
1444  parameters->getOption<CliqueCovering_Algorithm>(OPT_weighted_clique_register_algorithm))));
1445  }
1446  else
1447  {
1448  regb = GetPointer<const HLSFlowStepFactory>(design_flow_manager.lock()->CGetDesignFlowStepFactory("HLS"))
1449  ->CreateHLSFlowStep(parameters->getOption<HLSFlowStep_Type>(OPT_register_allocation_algorithm),
1450  funId);
1451  }
1452  regb->Initialize();
1453  regb->Exec();
1454  }
1455 
1456  for(const auto& fu_cv : candidate_vertices)
1457  {
1458  auto fu_s1 = fu_cv.first;
1460  "-->Considering fu " + allocation_information->get_fu_name(fu_s1).first);
1461 
1462  std::string res_name = allocation_information->get_fu_name(fu_s1).first;
1463  std::string lib_name = HLS->HLS_D->get_technology_manager()->get_library(res_name);
1464  const double mux_time = MODULE_BINDING_MUX_MARGIN * allocation_information->estimate_mux_time(fu_s1);
1465  double controller_delay = allocation_information->EstimateControllerDelay();
1466  double resource_area = allocation_information->compute_normalized_area(fu_s1);
1467  bool disabling_slack_based_binding =
1468  ((allocation_information->get_number_channels(fu_s1) >= 1) and
1469  (!allocation_information->is_readonly_memory_unit(fu_s1) ||
1470  (!parameters->isOption(OPT_rom_duplication) || !parameters->getOption<bool>(OPT_rom_duplication)))) ||
1471  lib_name == WORK_LIBRARY || lib_name == PROXY_LIBRARY ||
1472  allocation_information->get_number_fu(fu_s1) != INFINITE_UINT;
1473  const auto local_mux_time =
1474  (disabling_slack_based_binding ? -std::numeric_limits<double>::infinity() : mux_time);
1475  const auto fu_prec = allocation_information->get_prec(fu_s1);
1476  const auto cond1 = compute_condition1(lib_name, allocation_information, local_mux_time, fu_s1);
1477  const auto cond2 = compute_condition2(cond1, fu_prec, resource_area, small_normalized_resource_area);
1478 
1479  const auto cv_it_end = fu_cv.second.end();
1480  for(auto cv_it = fu_cv.second.begin(); cv_it != cv_it_end;)
1481  {
1482  auto cv1_it = cv_it;
1483  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->First operation is " + GET_NAME(sdg, *cv1_it));
1484  ++cv_it;
1485  for(auto cv2_it = cv_it; cv2_it != cv_it_end; ++cv2_it)
1486  {
1487  if(HLS->Rliv->are_in_conflict(*cv1_it, *cv2_it))
1488  {
1490  "---Conflict between operations " + GET_NAME(sdg, *cv1_it) + "(" +
1491  sdg->CGetOpNodeInfo(*cv1_it)->GetOperation() + ")" + GET_NAME(sdg, *cv2_it) + "(" +
1492  sdg->CGetOpNodeInfo(*cv2_it)->GetOperation() + ") (" +
1493  allocation_information->get_string_name(fu_s1) + ")");
1494  continue;
1495  }
1496  if(!disabling_slack_based_binding && !allocation_information->is_proxy_unit(fu_s1) &&
1497  allocation_information->get_number_channels(fu_s1) < 2 &&
1498  allocation_information->get_cycles(fu_s1, *cv1_it, sdg) <= 1 &&
1499  allocation_information->get_cycles(fu_s1, *cv2_it, sdg) <= 1)
1500  {
1501  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---Checking for execution frames");
1502  if(clock_cycle <
1503  (setup_hold_time + starting_time[*cv1_it] + ending_time[*cv2_it] - starting_time[*cv2_it]))
1504  {
1506  "---Performance based problem in sharing " + GET_NAME(sdg, *cv1_it) + "(" +
1507  sdg->CGetOpNodeInfo(*cv1_it)->GetOperation() + ")" + GET_NAME(sdg, *cv2_it) +
1508  "(" + sdg->CGetOpNodeInfo(*cv2_it)->GetOperation() + ")=" + STR(clock_cycle) +
1509  "<" +
1510  STR(setup_hold_time + starting_time[*cv1_it] + ending_time[*cv2_it] -
1511  starting_time[*cv2_it]));
1512  ++n_performance_conflicts;
1513  continue;
1514  }
1515  if(clock_cycle <
1516  (setup_hold_time + starting_time[*cv2_it] + ending_time[*cv1_it] - starting_time[*cv1_it]))
1517  {
1519  "---Performance based problem in sharing " + GET_NAME(sdg, *cv1_it) + "(" +
1520  sdg->CGetOpNodeInfo(*cv1_it)->GetOperation() + ")" + GET_NAME(sdg, *cv2_it) +
1521  "(" + sdg->CGetOpNodeInfo(*cv2_it)->GetOperation() + ")=" + STR(clock_cycle) +
1522  "<" +
1523  STR(setup_hold_time + starting_time[*cv2_it] + ending_time[*cv1_it] -
1524  starting_time[*cv1_it]));
1525  ++n_performance_conflicts;
1526  continue;
1527  }
1528  }
1529  if(GET_TYPE(dfg, *cv1_it) &
1531  {
1532  continue;
1533  }
1534  if(GET_TYPE(dfg, *cv2_it) &
1536  {
1537  continue;
1538  }
1539 
1540  _w = weight_computation(cond1, cond2, *cv1_it, *cv2_it, local_mux_time, dfg, fu, slack_time,
1541  starting_time,
1542 #ifdef HC_APPROACH
1543  hc,
1544 #endif
1545  con_rel, controller_delay, fu_prec);
1546  // std::cerr << "possible sharing between " << GET_NAME(sdg, *cv1_it) << " and " << GET_NAME(sdg,
1547  // *cv2_it) << ", but.... maybe next time " << _w << " " << mux_time << " -- " + STR(s2c[*cv1_it]) +
1548  // "->" + STR(s2c[*cv2_it])<< std::endl;
1549 
1551  if(_w > 0)
1552  {
1553  cdfc_edge E;
1554  bool exists;
1555  boost::tie(E, exists) = boost::edge(s2c[*cv1_it], s2c[*cv2_it], *cdfc_bulk_graph);
1556  if(!exists)
1557  {
1558  boost::tie(E, exists) = boost::add_edge(
1559  s2c[*cv1_it], s2c[*cv2_it], edge_cdfc_selector(COMPATIBILITY_EDGE, _w), *cdfc_bulk_graph);
1560  THROW_ASSERT(exists, "already inserted edge " + GET_NAME(sdg, *cv1_it) + " - " +
1561  GET_NAME(sdg, *cv2_it) + " -- " + STR(s2c[*cv1_it]) + "->" +
1562  STR(s2c[*cv2_it]));
1563  }
1564  else
1565  {
1566  THROW_ERROR("already inserted edge " + GET_NAME(sdg, *cv1_it) + " - " + GET_NAME(sdg, *cv2_it) +
1567  " -- " + STR(s2c[*cv1_it]) + "->" + STR(s2c[*cv2_it]));
1568  }
1569 
1570  boost::tie(E, exists) = boost::edge(s2c[*cv2_it], s2c[*cv1_it], *cdfc_bulk_graph);
1571  if(!exists)
1572  {
1573  boost::tie(E, exists) = boost::add_edge(
1574  s2c[*cv2_it], s2c[*cv1_it], edge_cdfc_selector(COMPATIBILITY_EDGE, _w), *cdfc_bulk_graph);
1575  THROW_ASSERT(exists, "already inserted edge " + GET_NAME(sdg, *cv2_it) + " - " +
1576  GET_NAME(sdg, *cv1_it) + " -- " + STR(s2c[*cv2_it]) + "->" +
1577  STR(s2c[*cv1_it]));
1578  }
1579  else
1580  {
1581  THROW_ERROR("already inserted edge " + GET_NAME(sdg, *cv2_it) + " - " + GET_NAME(sdg, *cv1_it) +
1582  " -- " + STR(s2c[*cv2_it]) + "->" + STR(s2c[*cv1_it]));
1583  }
1584  }
1585  else
1586  {
1588  "Null compatibility weight for " + GET_NAME(sdg, *cv1_it) + "(" +
1589  sdg->CGetOpNodeInfo(*cv1_it)->GetOperation() + ")" + GET_NAME(sdg, *cv2_it) + "(" +
1590  sdg->CGetOpNodeInfo(*cv2_it)->GetOperation() + ") (" +
1591  allocation_information->get_string_name(fu_s1) + ")");
1592  }
1593  }
1595  }
1597  "<--Considered fu " + allocation_information->get_fu_name(fu_s1).first);
1598  }
1600  {
1601  STOP_TIME(weight_cputime);
1603  "---Weight computation completed in " + print_cpu_time(weight_cputime) + " seconds");
1604  }
1605 
1606  const cdfc_graphRef CG = cdfc_graphRef(new cdfc_graph(
1607  *cdfc_bulk_graph, cdfc_graph_edge_selector<boost_cdfc_graph>(COMPATIBILITY_EDGE, &*cdfc_bulk_graph),
1609  const cdfc_graphConstRef CD_chained_graph = cdfc_graphConstRef(
1610  new cdfc_graph(*cdfc_bulk_graph, cdfc_graph_edge_selector<boost_cdfc_graph>(CD_EDGE, &*cdfc_bulk_graph),
1612  std::string functionName = FB->CGetBehavioralHelper()->get_function_name();
1613  if(parameters->getOption<bool>(OPT_print_dot))
1614  {
1615  // CD_chained_graph->WriteDot("HLS_CD_CHAINED.dot");
1616  }
1618  std::vector<int> cd_levels;
1619  cd_levels.resize(boost::num_vertices(*CD_chained_graph));
1621  std::deque<size_t> Csorted_vertices;
1622  topological_based_sorting(*CD_chained_graph, c2s, sdg, std::front_inserter(Csorted_vertices));
1623  auto sv_it_end = Csorted_vertices.end();
1624  for(auto sv_it = Csorted_vertices.begin(); sv_it != sv_it_end; ++sv_it)
1625  {
1626  cd_levels[*sv_it] = 0;
1627  cdfc_in_edge_iterator ie, ie_end;
1628  for(boost::tie(ie, ie_end) = boost::in_edges(*sv_it, *CD_chained_graph); ie != ie_end; ++ie)
1629  {
1630  if(boost::in_degree(boost::source(*ie, *CD_chained_graph), *CG) != 0)
1631  {
1632  cd_levels[*sv_it] =
1633  std::max(cd_levels[*sv_it], 1 + cd_levels[boost::get(boost::vertex_index, *CD_chained_graph,
1634  boost::source(*ie, *CD_chained_graph))]);
1635  }
1636  else
1637  {
1638  cd_levels[*sv_it] =
1639  std::max(cd_levels[*sv_it], cd_levels[boost::get(boost::vertex_index, *CD_chained_graph,
1640  boost::source(*ie, *CD_chained_graph))]);
1641  }
1642  }
1643  }
1644 
1646  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---remove all cycles from the cdfc graph");
1648  *cdfc_bulk_graph, cdfc_graph_edge_selector<boost_cdfc_graph>(CD_EDGE | COMPATIBILITY_EDGE, &*cdfc_bulk_graph),
1650 
1652  {
1653  START_TIME(falseloop_cputime);
1654  }
1655 
1656  unsigned int k = 2;
1657  std::deque<cdfc_edge> candidate_edges;
1658  CustomUnorderedSet<vertex> no_cycles;
1659  bool restart;
1660 
1661  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Looking for loops in compatibility graphs");
1662  do
1663  {
1664  restart = false;
1665  for(const auto candidate : all_candidate_vertices)
1666  {
1667  if(no_cycles.find(candidate) != no_cycles.end())
1668  {
1669  continue;
1670  }
1671  cdfc_vertex start = s2c[candidate];
1672 
1673  if(cd_levels[boost::get(boost::vertex_index, *CG, start)] != 0 && boost::in_degree(start, *CG) != 0)
1674  {
1675  bool found_a_loop;
1677  "-->Search loops starting from -> " + GET_NAME(sdg, candidate) + " iteration " + STR(k));
1678  found_a_loop = false_loop_search(start, k, cdfc, CG, candidate_edges);
1679  if(!found_a_loop)
1680  {
1681  no_cycles.insert(candidate);
1682  }
1683  restart |= found_a_loop;
1684  THROW_ASSERT(!found_a_loop || candidate_edges.size() >= 1, "something of unexpected happen");
1685  while(found_a_loop)
1686  {
1689  const auto ce_it_end = candidate_edges.end();
1690  auto ce_it = candidate_edges.begin();
1691  cdfc_edge cand_e = *ce_it;
1692  ++ce_it;
1693  cdfc_vertex cand_src = boost::source(cand_e, *CG);
1694  cdfc_vertex cand_tgt = boost::target(cand_e, *CG);
1695  int cand_level_difference = std::abs(cd_levels[boost::get(boost::vertex_index, *CG, cand_src)] -
1696  cd_levels[boost::get(boost::vertex_index, *CG, cand_tgt)]);
1697  size_t cand_out_degree = boost::out_degree(cand_src, *CG) + boost::out_degree(cand_tgt, *CG);
1698  int cand_edge_weight = (*CG)[cand_e].weight;
1701  "-->Analyzing compatibility between operations " +
1702  GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, cand_src)]) + "(" +
1703  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, cand_src)])->GetOperation() +
1704  ")" + " and " + GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, cand_tgt)]) + "(" +
1705  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, cand_tgt)])->GetOperation() +
1706  ")" + " - ld = " + STR(cand_level_difference) + " - d= " + STR(cand_out_degree) +
1707  " - w = " + STR(cand_edge_weight));
1708 
1709  for(; ce_it != ce_it_end; ++ce_it)
1710  {
1711  cdfc_edge e = *ce_it;
1712  cdfc_vertex src = boost::source(e, *CG);
1713  cdfc_vertex tgt = boost::target(e, *CG);
1714  int level_difference = std::abs(cd_levels[boost::get(boost::vertex_index, *CG, src)] -
1715  cd_levels[boost::get(boost::vertex_index, *CG, tgt)]);
1716  size_t out_degree = boost::out_degree(src, *CG) + boost::out_degree(tgt, *CG);
1717  int edge_weight = (*CG)[e].weight;
1720  "---Analyzing compatibility between operations " +
1721  GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, src)]) + "(" +
1722  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, src)])->GetOperation() + ")" +
1723  " and " + GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, tgt)]) + "(" +
1724  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, tgt)])->GetOperation() + ")" +
1725  " - ld = " + STR(level_difference) + " - d= " + STR(out_degree) +
1726  " - w = " + STR(edge_weight));
1727  if(level_difference > cand_level_difference ||
1728  (level_difference == cand_level_difference && out_degree > cand_out_degree) ||
1729  (level_difference == cand_level_difference && out_degree == cand_out_degree &&
1730  edge_weight < cand_edge_weight))
1731  {
1732  cand_src = src;
1733  cand_tgt = tgt;
1734  cand_e = e;
1735  cand_level_difference = level_difference;
1736  cand_out_degree = out_degree;
1737  cand_edge_weight = edge_weight;
1738  }
1739  }
1742  // boost::remove_edge(cand_e, *cdfc_bulk_graph);
1743  (*cdfc_bulk_graph)[cand_e].selector = 0;
1744  bool exists;
1745  boost::tie(cand_e, exists) = boost::edge(cand_tgt, cand_src, *CG);
1746  THROW_ASSERT(exists, "edge already removed");
1747  (*cdfc_bulk_graph)[cand_e].selector = 0;
1750  "---Removed compatibility between operations " +
1751  GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, cand_src)]) + "(" +
1752  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, cand_src)])->GetOperation() +
1753  ")" + " and " + GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, cand_tgt)]) + "(" +
1754  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, cand_tgt)])->GetOperation() +
1755  ")");
1756  candidate_edges.clear();
1757 
1759  found_a_loop = false_loop_search(start, k, cdfc, CG, candidate_edges);
1760 
1761  // std::cerr << "2 Search loops starting from -> " + GET_NAME(sdg, candidate) + " iteration " + STR(k)
1762  // << std::endl;
1763  }
1765  "<--Searched loops starting from -> " + GET_NAME(sdg, candidate) + " iteration " +
1766  STR(k));
1767  THROW_ASSERT(candidate_edges.empty(), "candidate_cycle has to be empty");
1768  }
1769  else
1770  {
1771  no_cycles.insert(candidate);
1772  }
1773  }
1774  ++k;
1775  } while(restart);
1776  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Looked for loops in compatibility graphs");
1777 
1779  {
1780  STOP_TIME(falseloop_cputime);
1782  "---False-loop computation completed in " + print_cpu_time(falseloop_cputime) + " seconds");
1783  }
1784 
1785  if(parameters->getOption<bool>(OPT_print_dot))
1786  {
1787  // cdfc->WriteDot("HLS_CD_COMP.dot");
1788  }
1789 
1790  CustomUnorderedMap<vertex, vertex> identity_converter;
1791 
1794  "---partition vertices for clique covering or bind the easy functional units");
1795  std::map<unsigned int, unsigned int> numModule;
1796  std::map<unsigned int, CustomOrderedSet<cdfc_vertex>, cdfc_resource_ordering_functor> partitions(r_functor);
1797  for(const auto& fu_cv : candidate_vertices)
1798  {
1799  fu_unit = fu_cv.first;
1800  for(const auto cv : fu_cv.second)
1801  {
1803  if(boost::out_degree(s2c[cv], *CG) == 0)
1804  {
1805  unsigned int num = 0;
1806  if(numModule.find(fu_unit) == numModule.end())
1807  {
1808  numModule[fu_unit] = 1;
1809  }
1810  else
1811  {
1812  num = numModule[fu_unit]++;
1813  }
1814  ++total_modules_allocated;
1815  total_resource_area += allocation_information->get_area(fu_unit);
1816  total_DSPs += allocation_information->get_DSPs(fu_unit);
1817  fu->bind(cv, fu_unit, num);
1818  }
1819  else
1820  {
1821  partitions[fu_unit].insert(s2c[cv]);
1822  identity_converter[cv] = cv;
1823  }
1824  }
1825  }
1826 
1828  {
1829  START_TIME(clique_cputime);
1830  }
1831 
1833  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---solve the binding problem for all the partitions");
1834  const unsigned int number_of_iterations = n_vert > OP_THRESHOLD ? 2 : 10;
1835  const std::map<unsigned int, unsigned int> numModule_initial = numModule;
1836  const size_t total_modules_allocated_initial = total_modules_allocated;
1837  const double total_resource_area_initial = total_resource_area;
1838  double total_resource_area_prev = total_resource_area;
1839  const double total_DSPs_initial = total_DSPs;
1840  double total_DSPs_prev = total_DSPs;
1841  const double total_area_muxes_initial = total_area_muxes;
1842  double total_area_muxes_prev = total_area_muxes;
1843  const CustomUnorderedMap<vertex, double> slack_time_initial = slack_time;
1844  const CustomUnorderedMap<vertex, double> starting_time_initial = starting_time;
1845 
1846  fu_bindingRef fu_best;
1847  if(parameters->getOption<int>(OPT_memory_banks_number) > 1 && !parameters->isOption(OPT_context_switch))
1848  {
1849  fu_best = fu_bindingRef(new ParallelMemoryFuBinding(HLSMgr, funId, parameters));
1850  }
1851  else
1852  {
1854  }
1855  double total_area_best = 0;
1856  size_t total_modules_allocated_best = 0;
1857  double total_resource_area_best = 0;
1858  double total_area_muxes_best = 0;
1859  double total_DSPs_best = 0;
1860 
1861  for(unsigned int iteration = 0; iteration < number_of_iterations; ++iteration)
1862  {
1863  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---Running iteration " + STR(iteration));
1864  if(iteration > 0)
1865  {
1866  if(iteration > 1 && total_resource_area == total_resource_area_prev && total_DSPs == total_DSPs_prev &&
1867  total_area_muxes == total_area_muxes_prev)
1868  {
1869  break;
1870  }
1871  numModule = numModule_initial;
1872  total_modules_allocated = total_modules_allocated_initial;
1873  total_resource_area_prev = total_resource_area;
1874  total_resource_area = total_resource_area_initial;
1875  total_DSPs_prev = total_DSPs;
1876  total_DSPs = total_DSPs_initial;
1877  slack_time = slack_time_initial;
1878  starting_time = starting_time_initial;
1879  total_area_muxes_prev = total_area_muxes;
1880  total_area_muxes = total_area_muxes_initial;
1881 
1882  {
1883  DesignFlowStepRef regb;
1884  // if(iteration%2)
1885  if(parameters->getOption<HLSFlowStep_Type>(OPT_register_allocation_algorithm) ==
1887  {
1888  regb =
1889  GetPointer<const HLSFlowStepFactory>(design_flow_manager.lock()->CGetDesignFlowStepFactory("HLS"))
1890  ->CreateHLSFlowStep(
1894  OPT_weighted_clique_register_algorithm))));
1895  }
1896  else
1897  {
1898  regb =
1899  GetPointer<const HLSFlowStepFactory>(design_flow_manager.lock()->CGetDesignFlowStepFactory("HLS"))
1900  ->CreateHLSFlowStep(
1901  parameters->getOption<HLSFlowStep_Type>(OPT_register_allocation_algorithm), funId);
1902  }
1903  regb->Initialize();
1904  regb->Exec();
1905  }
1906  }
1907 
1909  {
1910  START_TIME(clique_iteration_cputime);
1911  }
1912  for(const auto& partition : partitions)
1913  {
1914  THROW_ASSERT(partition.second.size() > 1, "bad projection");
1915  auto vert_it_end = partition.second.end();
1916  const double mux_time =
1917  MODULE_BINDING_MUX_MARGIN * allocation_information->estimate_mux_time(partition.first);
1918  double controller_delay = allocation_information->EstimateControllerDelay();
1919  double resource_area = allocation_information->compute_normalized_area(partition.first);
1920  auto fu_prec = allocation_information->get_prec(partition.first);
1921 
1923  "---controller_delay: " + STR(controller_delay) +
1924  " resource normalized area=" + STR(resource_area));
1926  "---mux_time: " + STR(mux_time) +
1927  " area_mux=" + STR(allocation_information->estimate_mux_area(partition.first)));
1928 
1929  const auto disabling_slack_cond0 =
1930  ((allocation_information->get_number_channels(partition.first) >= 1) and
1931  (!allocation_information->is_readonly_memory_unit(partition.first) ||
1932  (!parameters->isOption(OPT_rom_duplication) || !parameters->getOption<bool>(OPT_rom_duplication))));
1933  const auto clique_covering_method = [&]() {
1934  if(disabling_slack_cond0)
1935  {
1937  "DISABLING STD clique covering algorithm. Forced to BIPARTITE_MATCHING");
1939  }
1940  return GetPointer<const CDFCModuleBindingSpecialization>(hls_flow_step_specialization)
1941  ->clique_covering_algorithm;
1942  }();
1943 
1944  const auto res_name = allocation_information->get_fu_name(partition.first).first;
1945  const auto lib_name = HLS->HLS_D->get_technology_manager()->get_library(res_name);
1946  const auto disabling_slack_based_binding =
1947  disabling_slack_cond0 || lib_name == WORK_LIBRARY || lib_name == PROXY_LIBRARY ||
1948  allocation_information->get_number_fu(partition.first) != INFINITE_UINT;
1949  THROW_ASSERT(lib_name != PROXY_LIBRARY || 1 == allocation_information->get_number_fu(partition.first),
1950  "unexpected condition");
1951 
1953  auto module_clique = clique_covering<vertex>::create_solver(clique_covering_method,
1954  static_cast<unsigned>(partition.second.size()));
1956  for(const auto v : partition.second)
1957  {
1958  const auto el1_name = GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, v)]) + "(" +
1959  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, v)])->GetOperation() +
1960  ")";
1961  module_clique->add_vertex(c2s[boost::get(boost::vertex_index, *CG, v)], el1_name);
1962  }
1963 
1964  if(clique_covering_method == CliqueCovering_Algorithm::BIPARTITE_MATCHING)
1965  {
1967  size_t max_id = 0, curr_id;
1968  for(const auto v : partition.second)
1969  {
1970  const auto& running_states =
1971  HLS->Rliv->get_state_where_run(c2s[boost::get(boost::vertex_index, *CG, v)]);
1972  for(const auto state : running_states)
1973  {
1974  const auto v2id_it = v2id.find(state);
1975  if(v2id_it == v2id.end())
1976  {
1977  curr_id = max_id;
1978  v2id[state] = max_id;
1979  ++max_id;
1980  }
1981  else
1982  {
1983  curr_id = v2id_it->second;
1984  }
1985  module_clique->add_subpartitions(curr_id, c2s[boost::get(boost::vertex_index, *CG, v)]);
1986  }
1987  }
1988  }
1989  double local_mux_time =
1990  (disabling_slack_based_binding ? -std::numeric_limits<double>::infinity() : mux_time);
1991  const auto cond1 = compute_condition1(lib_name, allocation_information, local_mux_time, partition.first);
1992  const auto cond2 = compute_condition2(cond1, fu_prec, resource_area, small_normalized_resource_area);
1993 
1995  cdfc_edge_iterator cg_ei, cg_ei_end;
1996  const cdfc_graphConstRef CG_subgraph(new cdfc_graph(
1997  *cdfc_bulk_graph, cdfc_graph_edge_selector<boost_cdfc_graph>(COMPATIBILITY_EDGE, &*cdfc_bulk_graph),
1998  cdfc_graph_vertex_selector<boost_cdfc_graph>(&partition.second)));
1999  for(boost::tie(cg_ei, cg_ei_end) = boost::edges(*CG_subgraph); cg_ei != cg_ei_end; ++cg_ei)
2000  {
2001  vertex src = c2s[boost::get(boost::vertex_index, *CG_subgraph, boost::source(*cg_ei, *CG_subgraph))];
2002  vertex tgt = c2s[boost::get(boost::vertex_index, *CG_subgraph, boost::target(*cg_ei, *CG_subgraph))];
2003 #if HAVE_UNORDERED
2004  if(src > tgt)
2005  {
2006 #else
2007  if(GET_NAME(dfg, src) > GET_NAME(dfg, tgt))
2008  {
2009 #endif
2010  continue;
2011  }
2012  _w = weight_computation(cond1, cond2, src, tgt, local_mux_time, dfg, fu, slack_time, starting_time,
2013 #ifdef HC_APPROACH
2014  hc,
2015 #endif
2016  con_rel, controller_delay, fu_prec);
2017  if(_w > 0)
2018  {
2019  module_clique->add_edge(src, tgt, _w);
2020  }
2021  else
2022  {
2023  THROW_ASSERT(!disabling_slack_based_binding, "unexpected condition");
2024  }
2025  }
2026  if(parameters->getOption<bool>(OPT_print_dot))
2027  {
2028  const auto output_directory =
2029  parameters->getOption<std::string>(OPT_dot_directory) + "/" + functionName + "/";
2030  if(!std::filesystem::exists(output_directory))
2031  {
2032  std::filesystem::create_directories(output_directory);
2033  }
2034  const auto file_name =
2035  output_directory + "MB_" + allocation_information->get_string_name(partition.first) + ".dot";
2036  module_clique->writeDot(file_name);
2037  }
2038 
2039  if(allocation_information->get_number_fu(partition.first) != INFINITE_UINT)
2040  {
2041  THROW_ASSERT(allocation_information->get_number_channels(partition.first) == 0 ||
2042  allocation_information->get_number_channels(partition.first) ==
2043  allocation_information->get_number_fu(partition.first),
2044  "unexpected condition");
2045  PRINT_DBG_MEX(
2047  "Defining resource constraints for : " + allocation_information->get_string_name(partition.first) +
2048  " to " + STR(allocation_information->get_number_fu(partition.first)));
2049  module_clique->suggest_min_resources(allocation_information->get_number_channels(partition.first));
2050  if(allocation_information->get_number_channels(partition.first) > 0)
2051  {
2052  module_clique->max_resources(allocation_information->get_number_channels(partition.first));
2053  }
2054  }
2055 
2059  unsigned var = allocation_information->is_direct_access_memory_unit(partition.first) ?
2060  (allocation_information->is_memory_unit(partition.first) ?
2061  allocation_information->get_memory_var(partition.first) :
2062  allocation_information->get_proxy_memory_var(partition.first)) :
2063  0;
2064  if(var && !HLSMgr->Rmem->is_private_memory(var))
2065  {
2066  module_clique->min_resources(allocation_information->get_number_channels(partition.first));
2067  }
2068 
2070  "Starting clique covering on a graph with " + STR(partition.second.size()) +
2071  " vertices for " + allocation_information->get_string_name(partition.first));
2072 
2074  if(disabling_slack_based_binding)
2075  {
2076  PRINT_DBG_MEX(DEBUG_LEVEL_VERBOSE, debug_level, "Disabled slack based clique covering for: " + res_name);
2077  {
2079  module_clique->exec(no_filter_clique<vertex>(), cq);
2080  }
2082  "Number of cliques covering the graph: " + STR(module_clique->num_vertices()) + " for " +
2083  allocation_information->get_string_name(partition.first));
2084  if(module_clique->num_vertices() == 0 ||
2085  (allocation_information->get_number_channels(partition.first) >= 1 &&
2086  module_clique->num_vertices() > allocation_information->get_number_channels(partition.first)))
2087  {
2088  if(disabling_slack_cond0)
2089  {
2090  PRINT_DBG_MEX(DEBUG_LEVEL_VERBOSE, debug_level, "Restarting with WEIGHTED_COLORING: " + res_name);
2091  }
2092  else
2093  {
2094  PRINT_DBG_MEX(DEBUG_LEVEL_VERBOSE, debug_level, "Restarting with BIPARTITE_MATCHING: " + res_name);
2095  }
2097  (disabling_slack_cond0 ? CliqueCovering_Algorithm::WEIGHTED_COLORING :
2099  static_cast<unsigned>(partition.second.size()));
2100  for(auto vert_it = partition.second.begin(); vert_it != vert_it_end; ++vert_it)
2101  {
2102  const auto el1_name =
2103  GET_NAME(sdg, c2s[boost::get(boost::vertex_index, *CG, *vert_it)]) + "(" +
2104  sdg->CGetOpNodeInfo(c2s[boost::get(boost::vertex_index, *CG, *vert_it)])->GetOperation() + ")";
2105  module_clique->add_vertex(c2s[boost::get(boost::vertex_index, *CG, *vert_it)], el1_name);
2106  }
2107  {
2109  size_t max_id = 0, curr_id;
2110  for(const auto v : partition.second)
2111  {
2112  const CustomOrderedSet<vertex>& running_states =
2113  HLS->Rliv->get_state_where_run(c2s[boost::get(boost::vertex_index, *CG, v)]);
2114  for(const auto state : running_states)
2115  {
2116  const auto v2di_it = v2id.find(state);
2117  if(v2di_it == v2id.end())
2118  {
2119  curr_id = max_id;
2120  v2id[state] = max_id;
2121  ++max_id;
2122  }
2123  else
2124  {
2125  curr_id = v2di_it->second;
2126  }
2127  module_clique->add_subpartitions(curr_id, c2s[boost::get(boost::vertex_index, *CG, v)]);
2128  }
2129  }
2130  }
2131  const cdfc_graphConstRef CG_subgraph0(
2132  new cdfc_graph(*cdfc_bulk_graph,
2134  cdfc_graph_vertex_selector<boost_cdfc_graph>(&partition.second)));
2135  for(boost::tie(cg_ei, cg_ei_end) = boost::edges(*CG_subgraph0); cg_ei != cg_ei_end; ++cg_ei)
2136  {
2137  const auto src =
2138  c2s[boost::get(boost::vertex_index, *CG_subgraph0, boost::source(*cg_ei, *CG_subgraph0))];
2139  const auto tgt =
2140  c2s[boost::get(boost::vertex_index, *CG_subgraph0, boost::target(*cg_ei, *CG_subgraph0))];
2141 #if HAVE_UNORDERED
2142  if(src > tgt)
2143 #else
2144  if(GET_NAME(dfg, src) > GET_NAME(dfg, tgt))
2145 #endif
2146  {
2147  continue;
2148  }
2149  _w = weight_computation(cond1, cond2, src, tgt, local_mux_time, dfg, fu, slack_time, starting_time,
2150 #ifdef HC_APPROACH
2151  hc,
2152 #endif
2153  con_rel, controller_delay, fu_prec);
2154  if(_w > 0)
2155  {
2156  module_clique->add_edge(src, tgt, _w);
2157  }
2158  else
2159  {
2160  THROW_ERROR("unexpected condition");
2161  }
2162  }
2163  if(allocation_information->get_number_fu(partition.first) != INFINITE_UINT)
2164  {
2165  THROW_ASSERT(allocation_information->get_number_channels(partition.first) == 0 ||
2166  allocation_information->get_number_channels(partition.first) ==
2167  allocation_information->get_number_fu(partition.first),
2168  "unexpected condition");
2169 
2170  module_clique->suggest_min_resources(allocation_information->get_number_channels(partition.first));
2171  if(allocation_information->get_number_channels(partition.first) > 0)
2172  {
2173  module_clique->max_resources(allocation_information->get_number_channels(partition.first));
2174  }
2175  }
2176 
2180  if(var && !HLSMgr->Rmem->is_private_memory(var))
2181  {
2182  module_clique->min_resources(allocation_information->get_number_channels(partition.first));
2183  }
2184  {
2186  module_clique->exec(no_filter_clique<vertex>(), cq);
2187  }
2188  if(allocation_information->get_number_fu(partition.first) != INFINITE_UINT)
2189  {
2190  THROW_ASSERT(allocation_information->get_number_channels(partition.first) == 0 ||
2191  allocation_information->get_number_channels(partition.first) ==
2192  allocation_information->get_number_fu(partition.first),
2193  "unexpected condition");
2194  if(allocation_information->get_number_channels(partition.first) > 0 &&
2195  module_clique->num_vertices() > allocation_information->get_number_channels(partition.first) &&
2196  !allocation_information->is_readonly_memory_unit(partition.first))
2197  {
2198  THROW_ERROR("Something of wrong happen: no feasible solution exist for module binding: " +
2199  res_name + "[" + STR(module_clique->num_vertices()) + "]");
2200  }
2201  }
2202  }
2203  }
2204  else
2205  {
2206  const auto area_resource = allocation_information->get_area(partition.first) +
2207  100 * allocation_information->get_DSPs(partition.first);
2209  module_binding_check<vertex> cq(fu_prec, area_resource, HLS, HLSMgr, slack_time, starting_time,
2210  controller_delay, mrbs);
2211  module_clique->exec(slack_based_filtering(slack_time, starting_time, controller_delay, fu_prec, HLS,
2212  HLSMgr, area_resource, con_rel),
2213  cq);
2214  }
2216  "Number of cliques covering the graph: " + STR(module_clique->num_vertices()) + " for " +
2217  allocation_information->get_string_name(partition.first));
2218  total_modules_allocated += module_clique->num_vertices();
2219  to_update.clear();
2220 
2221  unsigned int Tot_mux = 0;
2222 
2224  unsigned int delta_nclique = 0;
2225  for(unsigned int i = 0; i < module_clique->num_vertices(); ++i)
2226  {
2227  const auto clique_temp = module_clique->get_clique(i);
2228  if(clique_temp.empty())
2229  {
2230  continue;
2231  }
2232 
2233 #if HAVE_UNORDERED
2234  const auto clique = clique_temp;
2235 #else
2236  OpVertexSet clique(sdg);
2237  clique.insert(clique_temp.begin(), clique_temp.end());
2238 #endif
2239 
2240  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Analyzing clique");
2241  fu_unit = fu->get_assign(*(clique.begin()));
2242  THROW_ASSERT(fu_unit == partition.first, "unexpected case");
2243  unsigned int num = 0;
2244  if(numModule.find(fu_unit) == numModule.end())
2245  {
2246  numModule[fu_unit] = 1;
2247  }
2248  else
2249  {
2250  num = numModule[fu_unit]++;
2251  }
2252  total_resource_area += allocation_information->get_area(fu_unit);
2253  total_DSPs += allocation_information->get_DSPs(fu_unit);
2254 
2255  unsigned int total_muxes;
2256  unsigned int n_shared;
2257 
2258  double mux_area_estimation, mux_time_estimation;
2259  estimate_muxes<true, false>(con_rel, fu_prec, mux_time_estimation, mux_area_estimation, clique,
2260  total_muxes, n_shared, identity_converter, HLSMgr, HLS, debug_level);
2261  Tot_mux += total_muxes;
2262  total_area_muxes += mux_area_estimation;
2264  "---estimate area muxes=" + STR(mux_area_estimation));
2266  "---estimate delay muxes=" + STR(mux_time_estimation));
2267 
2269  "---Sharing degree for " + allocation_information->get_string_name(fu_unit) + "_" +
2270  STR(num) + " = " + STR(clique.size()));
2272  double max_starting_time = 0.0;
2273  for(const auto current_vert : clique)
2274  {
2275  max_starting_time = std::max(max_starting_time, starting_time[current_vert]);
2276  }
2277  if(max_starting_time < controller_delay && total_muxes > 0)
2278  {
2279  max_starting_time = controller_delay;
2280  }
2281  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---max_starting_time=" + STR(max_starting_time));
2282  bool first_vertex = true;
2283  bool first_vertex_has_negative_slack = false;
2284 
2285  for(const auto current_vert : clique)
2286  {
2287  const auto node_id = sdg->CGetOpNodeInfo(current_vert)->GetNodeId();
2288 
2289  if(!first_vertex && (!disabling_slack_based_binding &&
2290  ((slack_time[current_vert] - (max_starting_time - starting_time[current_vert]) -
2291  mux_time_estimation) < 0 ||
2292  first_vertex_has_negative_slack) &&
2293  clique.size() > 1))
2294  {
2296  " negative slack: solution is not feasible");
2297  fu->bind(current_vert, fu_unit, numModule[fu_unit]);
2298  if(node_id)
2299  {
2301  "---" + GET_NAME(sdg, current_vert) + "(" +
2302  TreeM->get_tree_node_const(node_id)->ToString() + ") bound to " +
2303  allocation_information->get_string_name(fu_unit) + "(" +
2304  STR(numModule[fu_unit]) + ")");
2305  }
2306  numModule[fu_unit]++;
2307  total_resource_area += allocation_information->get_area(fu_unit);
2308  total_DSPs += allocation_information->get_DSPs(fu_unit);
2309  total_modules_allocated++;
2310  delta_nclique++;
2311  continue;
2312  }
2313  else
2314  {
2315  first_vertex = false;
2317  fu->bind(current_vert, fu_unit, num);
2318  if(node_id)
2319  {
2321  "---" + GET_NAME(sdg, current_vert) + "(" +
2322  TreeM->get_tree_node_const(node_id)->ToString() + ") bound to " +
2323  allocation_information->get_string_name(fu_unit) + "(" + STR(num) + ")");
2324  }
2325  }
2326  slack_time[current_vert] = slack_time[current_vert] -
2327  (max_starting_time - starting_time[current_vert]) - mux_time_estimation;
2328  if(slack_time[current_vert] < 0)
2329  {
2330  first_vertex_has_negative_slack = true;
2331  }
2332  starting_time[current_vert] = max_starting_time;
2333  to_update.insert(current_vert);
2334  update_slack_starting_time(fdfg, to_update, slack_time, starting_time, false, true, false);
2335  /*
2336  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Updated Starting time ***
2337  Latest ending time *** Slacks"); #ifndef NDEBUG for(const auto operation : sdg->CGetOperations())
2338  {
2339  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---" + GET_NAME(sdg,
2340  operation) + " *** starting_time=" + STR(starting_time.find(operation)->second) + " ***
2341  latest_ending_time=" + STR(ending_time.find(operation)->second) + " *** slack_time="+
2342  STR(slack_time.find(operation)->second));
2343  }
2344  #endif
2345  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--");
2346  */
2347  to_update.insert(current_vert);
2348  update_slack_starting_time(fdfg, to_update, slack_time, starting_time, true, false, true);
2349  /* INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "-->Updated Starting time
2350  *** Latest ending time *** Slacks"); #ifndef NDEBUG for(const auto operation : sdg->CGetOperations())
2351  {
2352  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "---" + GET_NAME(sdg,
2353  operation) + " *** starting_time=" + STR(starting_time.find(operation)->second) + " ***
2354  latest_ending_time=" + STR(ending_time.find(operation)->second) + " *** slack_time="+
2355  STR(slack_time.find(operation)->second));
2356  }
2357  #endif
2358  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--");
2359  */
2360  }
2361  INDENT_DBG_MEX(DEBUG_LEVEL_VERY_PEDANTIC, debug_level, "<--Analyzed clique");
2362  }
2363 
2365  "---cdfc mux estimation " + STR(Tot_mux) + " -- Number of cliques covering the graph: " +
2366  STR(module_clique->num_vertices() + delta_nclique) + " " + functionName + "_" +
2367  allocation_information->get_string_name(partition.first) + " with " +
2368  STR(partition.second.size()) + " vertices");
2369  }
2370  if(iteration == 0 || total_area_best > total_area_muxes + total_resource_area)
2371  {
2372  fu_best = fu;
2373  total_area_best = total_area_muxes + total_resource_area;
2374  total_modules_allocated_best = total_modules_allocated;
2375  total_resource_area_best = total_resource_area;
2376  total_area_muxes_best = total_area_muxes;
2377  total_DSPs_best = total_DSPs;
2378  }
2380  {
2381  STOP_TIME(clique_iteration_cputime);
2383  "---Iteration " + STR(iteration) + " completed in " +
2384  print_cpu_time(clique_iteration_cputime) + " seconds");
2386  {
2388  "---total_resource_area=" + STR(total_resource_area) + ", total_DSPs=" + STR(total_DSPs) +
2389  ", total_area_muxes=" + STR(total_area_muxes));
2390  }
2391  }
2392  }
2393  std::swap(fu_best, fu);
2394  std::swap(total_modules_allocated_best, total_modules_allocated);
2395  std::swap(total_resource_area_best, total_resource_area);
2396  std::swap(total_area_muxes_best, total_area_muxes);
2397  std::swap(total_DSPs_best, total_DSPs);
2399  {
2400  STOP_TIME(clique_cputime);
2402  "---Clique covering computation completed in " + print_cpu_time(clique_cputime) + " seconds");
2403  }
2404  }
2405 
2407  {
2409  }
2411  "-->Module binding information for function " +
2412  HLSMgr->CGetFunctionBehavior(funId)->CGetBehavioralHelper()->get_function_name() + ":");
2414  "---Number of modules instantiated: " + STR(total_modules_allocated));
2416  "---Number of performance conflicts: " + STR(n_performance_conflicts));
2418  "---Estimated resources area (no Muxes and address logic): " + STR(total_resource_area));
2419  INDENT_OUT_MEX(OUTPUT_LEVEL_MINIMUM, output_level, "---Estimated area of MUX21: " + STR(total_area_muxes));
2421  "---Total estimated area: " + STR(total_area_muxes + total_resource_area));
2422  INDENT_OUT_MEX(OUTPUT_LEVEL_MINIMUM, output_level, "---Estimated number of DSPs: " + STR(total_DSPs));
2424  {
2425  STOP_TIME(step_time);
2427  "Time to perform module binding: " + print_cpu_time(step_time) + " seconds");
2428  }
2431  {
2433  }
2435 }
2436 
2438  const cdfc_graphConstRef& cg, std::deque<cdfc_edge>& candidate_edges)
2439 {
2440  std::vector<bool> visited(boost::num_vertices(*cdfc), false);
2441  std::vector<bool> cg_visited(boost::num_vertices(*cg), false);
2442  std::vector<bool> cdfc_visited(boost::num_vertices(*cg), false);
2443  cg_visited[boost::get(boost::vertex_index, *cg, start)] = true;
2444  cdfc_out_edge_iterator oe_cg, oe_end_cg;
2445  for(boost::tie(oe_cg, oe_end_cg) = boost::out_edges(start, *cg); oe_cg != oe_end_cg; ++oe_cg)
2446  {
2447  cdfc_vertex tgt = boost::target(*oe_cg, *cg);
2448  visited[boost::get(boost::vertex_index, *cdfc, tgt)] = true;
2449  if(false_loop_search_cdfc_1(tgt, 1, k, start, cdfc, cg, candidate_edges, visited, cg_visited, cdfc_visited))
2450  {
2451  candidate_edges.push_front(*oe_cg);
2452  return true;
2453  }
2454  visited[boost::get(boost::vertex_index, *cdfc, tgt)] = false;
2455  }
2456  return false;
2457 }
2458 
2460  const cdfc_graphConstRef& cdfc, const cdfc_graphConstRef& cg,
2461  std::deque<cdfc_edge>& candidate_edges, std::vector<bool>& visited,
2462  std::vector<bool>& cg_visited, std::vector<bool>& cdfc_visited)
2463 {
2464  cdfc_out_edge_iterator oe_cdfc, oe_end_cdfc;
2465  if(level > k)
2466  {
2467  return false;
2468  }
2469  if(cdfc_visited[boost::get(boost::vertex_index, *cdfc, src)])
2470  {
2471  return false;
2472  }
2473  cdfc_visited[boost::get(boost::vertex_index, *cdfc, src)] = true;
2474  for(boost::tie(oe_cdfc, oe_end_cdfc) = boost::out_edges(src, *cdfc); oe_cdfc != oe_end_cdfc; ++oe_cdfc)
2475  {
2476  cdfc_vertex tgt = boost::target(*oe_cdfc, *cdfc);
2477  if(!visited[boost::get(boost::vertex_index, *cdfc, tgt)])
2478  {
2479  visited[boost::get(boost::vertex_index, *cdfc, tgt)] = true;
2480  bool is_cg_edge;
2481  cdfc_edge cg_e;
2482  boost::tie(cg_e, is_cg_edge) = boost::edge(src, tgt, *cg);
2483  if(!is_cg_edge && false_loop_search_cdfc_more(tgt, level, k, start, cdfc, cg, candidate_edges, visited,
2484  cg_visited, cdfc_visited))
2485  {
2486  return true;
2487  }
2488  visited[boost::get(boost::vertex_index, *cdfc, tgt)] = false;
2489  }
2490  }
2491  return false;
2492 }
2493 
2495  cdfc_vertex start, const cdfc_graphConstRef& cdfc,
2496  const cdfc_graphConstRef& cg,
2497  std::deque<cdfc_edge>& candidate_edges,
2498  std::vector<bool>& visited, std::vector<bool>& cg_visited,
2499  std::vector<bool>& cdfc_visited)
2500 {
2501  cdfc_out_edge_iterator oe_cdfc, oe_end_cdfc;
2502  if(start == src)
2503  {
2504  return true;
2505  }
2506  if(cg_visited[boost::get(boost::vertex_index, *cg, src)])
2507  {
2508  return false;
2509  }
2510  cg_visited[boost::get(boost::vertex_index, *cg, src)] = true;
2511 
2512  for(boost::tie(oe_cdfc, oe_end_cdfc) = boost::out_edges(src, *cdfc); oe_cdfc != oe_end_cdfc; ++oe_cdfc)
2513  {
2514  cdfc_vertex tgt = boost::target(*oe_cdfc, *cdfc);
2515  if(!visited[boost::get(boost::vertex_index, *cdfc, tgt)])
2516  {
2517  visited[boost::get(boost::vertex_index, *cdfc, tgt)] = true;
2518  cdfc_edge cg_e = *oe_cdfc;
2519  if((*cdfc)[cg_e].selector & COMPATIBILITY_EDGE)
2520  {
2521  if(false_loop_search_cdfc_1(tgt, level + 1, k, start, cdfc, cg, candidate_edges, visited, cg_visited,
2522  cdfc_visited))
2523  {
2524  candidate_edges.push_front(cg_e);
2525  return true;
2526  }
2527  }
2528  else
2529  {
2530  if(false_loop_search_cdfc_more(tgt, level, k, start, cdfc, cg, candidate_edges, visited, cg_visited,
2531  cdfc_visited))
2532  {
2533  return true;
2534  }
2535  }
2536  visited[boost::get(boost::vertex_index, *cdfc, tgt)] = false;
2537  }
2538  }
2539  return false;
2540 }
2541 
2543  const CustomUnorderedMap<vertex, double>& slack_time, const double mux_time)
2544 {
2545  const AllocationInformationConstRef allocation_information = HLS->allocation_information;
2546  if(can_be_clustered_table.find(v) != can_be_clustered_table.end())
2547  {
2548  return can_be_clustered_table.find(v)->second;
2549  }
2552  {
2553  can_be_clustered_table[v] = false;
2554  return false;
2555  }
2556  if(slack_time.find(v)->second < 2 * mux_time)
2557  {
2558  can_be_clustered_table[v] = false;
2559  return false;
2560  }
2561  auto fu_s1 = fu->get_assign(v);
2562  /*
2563  HLS->Rliv->set_HLS(HLS);
2564  std::string res_name = HLS->allocation_information->get_fu_name(fu_s1).first;
2565  std::string lib_name = HLS->HLS_D->get_technology_manager()->get_library(res_name);
2566  bool disabling_slack_based_binding = (HLS->allocation_information->get_number_channels(fu_s1) >= 1) ||
2567  lib_name == WORK_LIBRARY || lib_name == PROXY_LIBRARY ||
2568  (HLS->allocation_information->get_number_fu(fu_s1) != INFINITE_UINT &&
2569  !HLS->allocation_information->is_indirect_access_memory_unit(fu_s1)); if(disabling_slack_based_binding)
2570  {
2571  can_be_clustered_table[v] = false;
2572  return false;
2573  }
2574  if(GET_TYPE(fsdg, v) & (TYPE_ENTRY|TYPE_EXIT)) return false;
2575  if((GET_TYPE(fsdg, v) & (TYPE_LOAD | TYPE_STORE)) ||
2576  fsdg->CGetOpNodeInfo(v)->GetOperation() == STORE ||
2577  fsdg->CGetOpNodeInfo(v)->GetOperation() == MEMSET ||
2578  fsdg->CGetOpNodeInfo(v)->GetOperation() == MEMCPY ||
2579  fsdg->CGetOpNodeInfo(v)->GetOperation() == MEMCMP)
2580  {
2581  can_be_clustered_table[v] = false;
2582  return false;
2583  }
2584 
2585  OutEdgeIterator oe_fsdg, oe_end_fsdg;
2586  bool is_first=true;
2587  bool conflict_p = false;
2588  for(tie(oe_fsdg, oe_end_fsdg) = boost::out_edges(v, *fsdg); oe_fsdg != oe_end_fsdg; ++oe_fsdg)
2589  {
2590  if(GET_TYPE(fsdg, boost::target(*oe_fsdg, *fsdg)) &
2591  (TYPE_ENTRY|TYPE_EXIT|TYPE_PHI|TYPE_VPHI|TYPE_GOTO|TYPE_LABEL|TYPE_RET|TYPE_SWITCH|TYPE_MULTIIF|TYPE_IF|TYPE_EXTERNAL))
2592  continue; if(is_first)
2593  {
2594  conflict_p = HLS->Rliv->are_in_conflict(v, boost::target(*oe_fsdg, *fsdg));
2595  is_first = false;
2596  }
2597  else
2598  {
2599  bool curr_conflict_p = HLS->Rliv->are_in_conflict(v, boost::target(*oe_fsdg, *fsdg));
2600  if(curr_conflict_p != conflict_p)
2601  {
2602  can_be_clustered_table[v] = false;
2603  return false;
2604  }
2605  }
2606  }
2607 */
2608 
2609  std::vector<HLS_manager::io_binding_type> vars_read1 = HLSMgr->get_required_values(HLS->functionId, v);
2610  if(vars_read1.size() > 1)
2611  {
2612  double resource_area = allocation_information->compute_normalized_area(fu_s1);
2613  double exec_time = allocation_information->get_worst_execution_time(fu_s1) -
2614  allocation_information->get_correction_time(
2615  fu_s1, fsdg->CGetOpNodeInfo(v)->GetOperation(),
2616  static_cast<unsigned>(fsdg->CGetOpNodeInfo(v)
2619  .size()));
2620  // double stage_time = allocation_information->get_worst_stage_period(fu_s1);
2621  // if(exec_time == 0.0 && stage_time == 0.0) return true;
2622  if(exec_time < 1.0 || resource_area < 0.5)
2623  {
2624  can_be_clustered_table[v] = false;
2625  return false;
2626  }
2627  }
2628  can_be_clustered_table[v] = true;
2629  return true;
2630 }
2631 
2632 int cdfc_module_binding::weight_computation(bool cond1, bool cond2, vertex v1, vertex v2, const double mux_time,
2633  const OpGraphConstRef
2634 #ifndef NDEBUG
2635  fsdg
2636 #endif
2637  ,
2639 #ifdef HC_APPROACH
2640  fu
2641 #endif
2642  ,
2643  const CustomUnorderedMap<vertex, double>& slack_time,
2644  CustomUnorderedMap<vertex, double>& starting_time,
2645 #ifdef HC_APPROACH
2646  spec_hierarchical_clustering& hc,
2647 #endif
2648  connection_relation& con_rel, double controller_delay,
2649  unsigned long long prec)
2650 {
2652  "-->Weight computation of " + GET_NAME(fsdg, v1) + "-->" + GET_NAME(fsdg, v2));
2653  size_t _w = 1;
2654  size_t threshold1, threshold2;
2655  size_t in1 = con_rel.find(v1)->second.size();
2656  size_t in2 = con_rel.find(v2)->second.size();
2657  unsigned int total_muxes = 0;
2658  if(in1 == in2)
2659  {
2660  // std::cerr << "same number of operand" << std::endl;
2661  size_t n_inputs = in1;
2662  threshold1 = 2 * n_inputs;
2664  std::vector<vertex> cluster(2);
2665  converter[v1] = v1;
2666  cluster[0] = v1;
2667  converter[v2] = v2;
2668  cluster[1] = v2;
2669  unsigned int n_shared;
2670  double mux_area_estimation, mux_time_estimation;
2671  estimate_muxes<false, false>(con_rel, prec, mux_time_estimation, mux_area_estimation, cluster, total_muxes,
2672  n_shared, converter, HLSMgr, HLS, debug_level);
2674  "---total_muxes=" + STR(total_muxes) + " n_shared=" + STR(n_shared) +
2675  " n_inputs=" + STR(n_inputs));
2676  // std::cerr << "total_muxes " << total_muxes << " n_shared " << n_shared << " n_inputs " << n_inputs << " " <<
2677  // GET_OP(fsdg, v1) + "-" + GET_OP(fsdg, v2) << std::endl;
2678 
2679  if(total_muxes > n_inputs)
2680  {
2681  _w = n_inputs;
2682  }
2683  else
2684  {
2685  _w = 1 + n_inputs + n_inputs - total_muxes;
2686  }
2687  _w += n_shared;
2688  threshold2 = threshold1 + n_shared;
2689  }
2690  else
2691  {
2692  threshold1 = 2 * std::max(in1, in2);
2693  threshold2 = threshold1;
2694  }
2695  // std::cerr << "_w=" << _w << std::endl;
2696 
2697  double max_starting_time = std::max(starting_time.find(v1)->second, starting_time.find(v2)->second);
2698  if(max_starting_time < controller_delay && total_muxes > 0)
2699  {
2700  max_starting_time = controller_delay;
2701  }
2702  double v1_slack, v2_slack;
2703  v1_slack = slack_time.find(v1)->second - (max_starting_time - starting_time.find(v1)->second);
2704  v2_slack = slack_time.find(v2)->second - (max_starting_time - starting_time.find(v2)->second);
2705 
2707  "---cond1=" + (cond1 ? std::string("T") : std::string("F")) +
2708  " cond2=" + (cond2 ? std::string("T") : std::string("F")));
2710  "---Weight before=" + STR(_w) + " v1_slack=" + STR(v1_slack) + " v2_slack=" + STR(v2_slack) +
2711  " threshold=" + STR(threshold1) + " resource_type=" + " mux_time=" + STR(mux_time) +
2712  " prec=" + STR(prec));
2713 
2714  if(cond1 && _w <= threshold1 && ((v1_slack < mux_time) || (v2_slack < mux_time)))
2715  {
2716  _w = 0;
2717 #ifdef HC_APPROACH
2718  int _w_saved = _w;
2719  if(can_be_clustered(v1, fsdg, fu, slack_time, mux_time) && can_be_clustered(v2, fsdg, fu, slack_time, mux_time))
2720  {
2721  double p_weight =
2722  hc.pair_weight(boost::get(boost::vertex_index, *fsdg, v1), boost::get(boost::vertex_index, *fsdg, v2));
2723  int delta = static_cast<int>(static_cast<double>(threshold1) * p_weight);
2724  if(p_weight >= 1.0)
2725  _w += delta;
2726  }
2727  if(_w != _w_saved)
2728  {
2729  std::cerr << "Before " << _w_saved << " " << GET_NAME(fsdg, v1) << "("
2730  << fsdg->CGetOpNodeInfo(v1)->GetOperation() << ")-"
2731  << "-" << GET_NAME(fsdg, v2) << std::endl;
2732  std::cerr << "After " << _w << std::endl;
2733  }
2734 #endif
2735  }
2736  else if((cond2 && _w <= threshold2))
2737  {
2738  _w = 0;
2739  }
2740 
2741  if(_w > 31)
2742  {
2743  _w = 31;
2744  }
2746  "<--Weight of " + GET_NAME(fsdg, v1) + "-->" + GET_NAME(fsdg, v2) + " is " + STR(_w));
2747  return static_cast<int>(_w);
2748 }
#define TYPE_SWITCH
constant identifying the node type of a SWITCH operation.
Definition: op_graph.hpp:105
double get_worst_execution_time(const unsigned int fu_name) const
Returns the worst execution time for all the operations associated with the functional unit...
#define CLOCK_MARGIN
OpEdgeSet CGetInEdges(const vertex v) const
Return the edge ingoing in a vertex.
Definition: op_graph.cpp:388
double get_area(const unsigned int fu_name) const
Returns the area for a given functional unit.
void operator()(std::ostream &out, const EdgeDescriptor &e) const override
Operator which print label of an EdgeDescriptor.
Class specification to contain liveness information.
unsigned int get_register(unsigned int sv) const
return the register index where the storage value is stored
#define DEBUG_LEVEL_VERY_PEDANTIC
extremely verbose debugging print is performed.
const HLS_managerRef HLSMgr
information about all the HLS synthesis
Definition: hls_step.hpp:205
bool is_vertex_bounded(const unsigned int fu_name) const
Checks if the functional unit is uniquely bounded to a vertex.
System dependence graph.
int GetSelector() const
Return the selector of this graph.
Definition: graph.hpp:922
Data structure representing the entire HLS information.
#define INDENT_DBG_MEX(dbgLevel, curDbgLevel, mex)
We are producing a debug version of the program, so the message is printed;.
Module binding based on the analysis of the control data flow chained graph.
class containing information about chaining
boost::graph_traits< graph >::out_edge_iterator OutEdgeIterator
out_edge_iterator definition.
Definition: graph.hpp:1312
double get_correction_time(unsigned int fu, const std::string &operation_name, unsigned int n_ins) const
return a time to be subtracted to the execution time/stage period
const OpWriter operation_writer
The functor used to print labels which correspond to vertices of the graph to be printed.
#define GET_TYPE(data, vertex_index)
Helper macro returning the type associated with a node.
CdfcEdgeWriter(const CdfcGraph *_printing_graph)
Constructor.
void back_edge(const Edge &e, Graph &g)
const OpGraphConstRef data
File containing functions and utilities to support the printing of debug messagges.
unsigned long long get_prec(const unsigned int fu_name) const
return the precision of the given functional unit
The info associated with a cdfc graph.
#define PRINT_DBG_MEX(dbgLevel, curDbgLevel, mex)
We are producing a debug version of the program, so the message is printed;.
const ParameterConstRef parameters
Set of input parameters.
Definition: graph.hpp:291
std::string ToString() const
Print this node as string in gimple format.
This file contains the structures needed to manage a graph that will represent the state transition g...
const CustomOrderedSet< vertex > & get_state_where_end(vertex op) const
return in which support vertex the operation is ending
Definition: liveness.cpp:235
#define OP_THRESHOLD
Base class storing user data information to the whole graph.
Definition: graph_info.hpp:60
bool is_direct_access_memory_unit(unsigned int fu_type) const
return true in case fu type is a memory unit with direct access channels
This header file includes four different algorithms heuristically solving the clique covering problem...
#define FB_DFG_SCA_SELECTOR
Data flow graph edge selector between computed on scalars.
Definition: op_graph.hpp:453
#define TYPE_MULTIIF
constant identifying the a multi-way if
Definition: op_graph.hpp:202
#define TYPE_VPHI
constant string identifying an operation node of type virtual phi-nodes
Definition: op_graph.hpp:162
string target
Definition: lenet_tvm.py:16
refcount< fu_binding > fu_bindingRef
RefCount type definition of the fu_binding class structure.
Definition: fu_binding.hpp:402
#define GET_CLASS(obj)
Macro returning the actual type of an object.
Dest from_strongtype_cast(Source source)
std::string get_function_name() const
Return the name of the function.
Weighted clique covering register allocation procedure.
#define TYPE_IF
constant identifying the node type of an IF operation.
Definition: op_graph.hpp:100
const CustomUnorderedMap< vertex, vertex > & c2s
const int output_level
The output level.
boost::graph_traits< cdfc_graph >::in_edge_iterator cdfc_in_edge_iterator
in_edge_iterator definition.
#define C
Definition: generate.c:15
bool get_ports_are_swapped(vertex v) const
Check if vertex v has its ports swapped or not.
Definition: fu_binding.hpp:359
static fu_bindingRef create_fu_binding(const HLS_managerConstRef _HLSMgr, const unsigned int _function_id, const ParameterConstRef _parameters)
create_fu_binding: factory method for fu_binding
Definition: fu_binding.cpp:135
Predicate functor object used to select the proper set of edges.
AbsControlStep get_cstep(const vertex &op) const
Returns the clock cycle where the given operation has been scheduled.
Definition: schedule.cpp:270
#define INDENT_OUT_MEX(outLevel, curOutLevel, mex)
double get_fo_correction(unsigned int first_operation, unsigned int second_operation) const
return the fan-out correction for a given edge
Definition: schedule.cpp:1162
absl::btree_map< T, U, Compare, Alloc > CustomOrderedMap
Definition: custom_map.hpp:160
CdfcEdgeInfo(const int edge_weight)
Constructor.
unsigned int get_assign(const vertex &v) const
Returns the functional unit assigned to the vertex.
Definition: fu_binding.cpp:242
const unsigned int funId
identifier of the function to be processed (0 means that it is a global step)
#define GET_NAME(data, vertex_index)
Helper macro returning the name associated with a node.
Class specification of the graph structures.
std::pair< std::string, std::string > get_fu_name(unsigned int id) const
Returns the name of the functional unit, associated with the name of the library, given the correspon...
const HLS_deviceRef HLS_D
reference to the information representing the target for the synthesis
Definition: hls.hpp:107
boost::graph_traits< graph >::in_edge_iterator InEdgeIterator
in_edge_iterator definition.
Definition: graph.hpp:1310
static bool is_parameter(const tree_managerConstRef &TM, const unsigned int index)
return true in case the index corresponds to a parameter in ssa form or not
double estimate_muxNto1_area(unsigned long long fu_prec, unsigned int mux_ins) const
Return the area due to mux with n-inputs.
AbsControlStep get_cstep_end(const vertex &op) const
Return the last clock cycle in which the operation execute.
Definition: schedule.cpp:302
double get_DSPs(const unsigned int fu_name) const
return an estimation of the number of DSPs used by the unit
AllocationInformationRef allocation_information
Store the technology information.
Definition: hls.hpp:115
const OpVertexSet CGetOperations() const
Return the vertices belonging to the graph.
Definition: op_graph.cpp:376
bool false_loop_search_cdfc_more(cdfc_vertex src, unsigned int level, unsigned k, cdfc_vertex start, const cdfc_graphConstRef &cdfc, const cdfc_graphConstRef &cg, std::deque< cdfc_edge > &candidate_edges, std::vector< bool > &visited, std::vector< bool > &cg_visited, std::vector< bool > &cdfc_visited)
bool is_proxy_unit(const unsigned int fu_name) const
check if a functional unit is a proxy
void initialize_connection_relation(connection_relation &con_rel, const OpVertexSet &all_candidate_vertices)
refcount< boost_cdfc_graph > boost_cdfc_graphRef
DesignFlowStep_Status InternalExec() override
Execute the step.
#define min(x, y)
double compute_normalized_area(unsigned int fu_s1) const
compute a number representing the "weight" of the functional unit
const CustomUnorderedMap< vertex, double > & starting_time
CliqueCovering_Algorithm
Defines all clique covering algorithm.
Class specification of the manager of the technology library data structures.
#define abs(x)
Definition: main.c:3
void operator()(std::ostream &out, const vertex &v) const override
Operator used to print the label of a vertex.
CustomUnorderedMap< vertex, std::vector< CustomOrderedSet< std::pair< conn_code, std::pair< unsigned int, vertex > >> >> connection_relation
put into relation an operation vertex with its sources op vertex -> vector of port index -> set of pa...
virtual bool is_a_storage_value(vertex curr_vertex, unsigned int var_index)=0
return true in case a storage value exist for the pair vertex variable
#define OUTPUT_LEVEL_MINIMUM
minimum debugging print is performed.
redefinition of map to manage ordered/unordered structures
Include a set of utilities used to manage CPU time measures.
topological_based_sorting_visitor(std::vector< vertex > &_c2s, const OpGraphConstRef _sdg, OutputIterator _iter)
const tree_nodeConstRef CGetTreeNode(const unsigned int i) const
A set of operation vertices.
Definition: op_graph.hpp:654
boost::filtered_graph< boost_cdfc_graph, cdfc_graph_edge_selector< boost_cdfc_graph >, cdfc_graph_vertex_selector< boost_cdfc_graph > > cdfc_graph
compatibility graph
unsigned int get_worst_number_of_cycles(const unsigned int fu_name) const
#define TYPE_PHI
constant string identifying an operation node of type PHI
Definition: op_graph.hpp:135
void WriteDot(const std::string &file_name, const int detail_level=0) const
Writes this graph in dot format.
bool are_in_conflict(vertex op1, vertex op2) const
states if two operations are in conflict (i.e.
Definition: liveness.cpp:257
#define STR(s)
Macro which performs a lexical_cast to a string.
Auxiliary methods for manipulating string.
const AllocationInformationConstRef allocation_information
copy of the order: control step associated with the vertices.
#define max
Definition: backprop.h:17
cdfc_module_binding(const ParameterConstRef _parameters, const HLS_managerRef HLSMgr, unsigned int funId, const DesignFlowManagerConstRef design_flow_manager, const HLSFlowStepSpecializationConstRef hls_flow_step_specialization)
This is the constructor of the class.
bool can_be_clustered(vertex v, OpGraphConstRef fsdg, const fu_bindingConstRef fu, const CustomUnorderedMap< vertex, double > &slack_time, const double mux_time)
double estimate_mux_time(unsigned int fu_name) const
estimate the delay of a mux that can be uses to mux the input of the given functional unit ...
std::string GetSignature() const override
Return the contribution to the signature of a step given by the specialization.
unsigned int get_cycles(const unsigned int fu_name, const unsigned int v) const
Return the number of cycles for given vertex and a given functional unit.
boost::graph_traits< graph >::edge_iterator EdgeIterator
edge_iterator definition.
Definition: graph.hpp:1314
const tree_nodeRef get_tree_node_const(unsigned int i) const
Return the reference to the i-th tree_node Constant version of get_tree_node.
#define TYPE_LABEL
A vertex is of type TYPE_LABEL when it is a target of a goto expression.
Definition: op_graph.hpp:151
OpEdgeSet CGetOutEdges(const vertex v) const
Return the edge outgoing from a vertex.
Definition: op_graph.cpp:407
CustomUnorderedMap< vertex, bool > can_be_clustered_table
record if a vertex has to be clustered or not
const OpNodeInfoConstRef CGetOpNodeInfo(const vertex node) const
Returns the info associated with a node.
Definition: op_graph.hpp:843
~CdfcGraphsCollection() override
Destructor.
#define TYPE_EXIT
constant identifying the node type of an exit node.
#define START_TIME(time_var)
Macro used to store the start time into time_var.
Definition: cpu_time.hpp:133
#define THROW_UNREACHABLE(str_expr)
helper function used to specify that some points should never be reached
Definition: exceptions.hpp:292
System dependence graph with feedback.
fu_bindingRef Rfu
Store the refcounted functional unit binding of the operations.
Definition: hls.hpp:121
Functor used to reduce the size of clique: the rationale of filtering is that too many sharing may cr...
#define MODULE_BINDING_MUX_MARGIN
int delay(int ritardo)
Definition: delay.c:1
Factory for hls flow step.
ScheduleRef Rsch
Store the refcounted scheduling of the operations.
Definition: hls.hpp:118
unsigned edges[4545]
Definition: graph.h:25
CdfcGraph(const CdfcGraphsCollectionRef cdfc_graphs_collection, const int selector)
Constructor.
Data structure used to store the register binding of variables.
Base class for step of design flow.
StorageValueInformationRef storage_value_information
data-structure for storage values
Definition: hls.hpp:130
unsigned int get_proxy_memory_var(const unsigned int fu_name) const
return the var associated with the proxy unit
double estimate_muxNto1_delay(unsigned long long fu_prec, unsigned int mux_ins) const
Return the delay due to mux with n-inputs.
void set_HLS(hlsRef _HLS)
Definition: liveness.hpp:362
#define TYPE_GOTO
A vertex is of type TYPE_GOTO when it is associated with a goto expression.
Definition: op_graph.hpp:157
This class specifies the characteristic of a particular operation working on a given functional unit...
absl::flat_hash_map< T, U, Hash, Eq, Alloc > CustomUnorderedMap
Definition: custom_map.hpp:148
void finish_vertex(const Vertex &u, Graph &g)
Data structure used to store the schedule of the operations.
const unsigned long long mux_prec
Functor used to write the content of a vertex to dotty file.
Definition: graph.hpp:1406
static const uint32_t k[]
Definition: sha-256.c:22
static refcount< clique_covering< VertexType > > create_solver(CliqueCovering_Algorithm solver, unsigned int nvert)
Creates a reference to desired solver.
HLSFlowStep_Type
Definition: hls_step.hpp:95
Class specification of the data structures used to manage technology information. ...
std::string GetKindText() const override
Return the string representation of this.
#define TYPE_EXTERNAL
constant identifying the node type of a EXTERNAL operation (a function call)
Definition: op_graph.hpp:95
bool operator()(const unsigned int &a, const unsigned int &b) const
functor function used to compare two resources with respect to their area
static bool compute_condition1(const std::string &lib_name, const AllocationInformationConstRef allocation_information, double local_mux_time, unsigned int fu_s1)
cdfc_resource_ordering_functor(const AllocationInformationConstRef _allocation_soluton)
Constructor.
#define index(x, y)
Definition: Keccak.c:74
void bind(const vertex &v, unsigned int unit, unsigned int index=std::numeric_limits< unsigned int >::max())
Binds an operation vertex to a functional unit.
Definition: fu_binding.cpp:173
const CliqueCovering_Algorithm clique_covering_algorithm
The cdfc module binding algorithm.
redefinition of set to manage ordered/unordered structures
#define STOP_TIME(time_var)
Macro used to store the elapsed time into time_var.
Definition: cpu_time.hpp:136
slack_based_filtering(const CustomUnorderedMap< vertex, double > &_slack_time, const CustomUnorderedMap< vertex, double > &_starting_time, double _controller_delay, unsigned long long _mux_prec, const hlsRef _HLS, const HLS_managerRef _HLSMgr, const double _area_resource, const connection_relation &_con_rel)
bool may_be_chained_ops(vertex tgt, vertex src) const
check if two operations are chained in at least one state
boost::graph_traits< graph >::vertex_descriptor vertex
vertex definition.
Definition: graph.hpp:1303
~cdfc_module_binding() override
Destructor.
const Wrefcount< const DesignFlowManager > design_flow_manager
The design flow manager.
Data structure used to store the functional-unit binding of the vertices.
#define DEFAULT_SMALL_NORMALIZED_RESOURCE_AREA
General class used to describe a graph in PandA.
Definition: graph.hpp:771
const BehavioralHelperConstRef CGetBehavioralHelper() const
Returns the helper associated with the function.
boost::graph_traits< graph >::vertex_iterator VertexIterator
vertex_iterator definition.
Definition: graph.hpp:1307
Classes specification of the tree_node data structures.
CdfcGraphsCollection(const CdfcGraphInfoRef cdfc_graph_info, const ParameterConstRef parameters)
Constructor.
const ParameterConstRef parameters
Set of input parameters.
DesignFlowStep_Status
The status of a step.
This package is used by all HLS packages to manage resource constraints and characteristics.
Cdfc graph.
This file collects some utility functions and macros.
reg_bindingRef Rreg
Store the refcounted register binding of the variables.
Definition: hls.hpp:133
~CdfcGraph() override
Destructor.
#define THROW_ERROR(str_expr)
helper function used to throw an error in a standard way
Definition: exceptions.hpp:263
Wrapper of design_flow.
void update_slack_starting_time(const OpGraphConstRef fdfg, OpVertexSet &sorted_vertices, CustomUnorderedMap< vertex, double > &slack_time, CustomUnorderedMap< vertex, double > &starting_time, bool update_starting_time, bool only_backward, bool only_forward)
Data structure definition for HLS constraints.
This package is used to define the storage value scheme adopted by the register allocation algorithms...
int result[SIZE]
Definition: adpcm.c:800
const double small_normalized_resource_area
Threshold used in sharing of functional units.
double estimate_mux_area(unsigned int fu_name) const
Estimate the area of a mux attached to a given functional unit.
livenessRef Rliv
data-structure containing the variable liveness
Definition: hls.hpp:127
This file collects some utility functions.
void TopologicalSort(std::list< boost::graph_traits< graphs_collection >::vertex_descriptor > &sorted_vertices) const
Compute the topological order of the graph.
Definition: graph.hpp:996
bool is_one_cycle_direct_access_memory_unit(unsigned int fu_type) const
return true in case the functional unit is a direct_access memory unit with a single clock latency fo...
double GetStartingTime(const unsigned int operation) const
Return the starting time of the operation.
Definition: schedule.cpp:1151
std::string print_cpu_time(long int t)
massage a long which represents a time interval in milliseconds, into a string suitable for output ...
Definition: cpu_time.hpp:110
boost::graph_traits< cdfc_graph >::edge_descriptor cdfc_edge
edge definition.
int weight_computation(bool cond1, bool cond2, vertex v1, vertex v2, const double mux_time, const OpGraphConstRef fdfg, const fu_bindingConstRef fu, const CustomUnorderedMap< vertex, double > &slack_time, CustomUnorderedMap< vertex, double > &starting_time, connection_relation &con_rel, double controller_delay, unsigned long long prec)
const std::string CliqueCovering_AlgorithmToString(const CliqueCovering_Algorithm clique_covering_algorithm)
Header include.
#define DFG_SCA_SELECTOR
Selectors used only in operation graphs; numbers continue from cdfg_edge_info.hpp.
Definition: op_graph.hpp:447
bulk graph.
Definition: graph.hpp:287
refcount< T > lock() const
Definition: refcount.hpp:212
boost::adjacency_list< boost::vecS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_index_t, std::size_t >, edge_cdfc_selector > boost_cdfc_graph
bulk compatibility graph
bool false_loop_search_cdfc_1(cdfc_vertex src, unsigned int level, unsigned k, cdfc_vertex start, const cdfc_graphConstRef &cdfc, const cdfc_graphConstRef &cg, std::deque< cdfc_edge > &candidate_edges, std::vector< bool > &visited, std::vector< bool > &cg_visited, std::vector< bool > &cdfc_visited)
bool false_loop_search(cdfc_vertex start, unsigned k, const cdfc_graphConstRef &cdfc, const cdfc_graphConstRef &cg, std::deque< cdfc_edge > &candidate_edges)
string full_name
Definition: test_panda.py:841
boost::graph_traits< cdfc_graph >::edge_iterator cdfc_edge_iterator
edge_iterator definition.
#define OUTPUT_LEVEL_PEDANTIC
verbose debugging print is performed.
CDFCModuleBindingSpecialization(const CliqueCovering_Algorithm clique_covering_algorithm)
Constructor.
const HLS_managerRef HLSMgr
Functor used to write the content of the edges to a dotty file.
Definition: graph.hpp:1441
const GraphInfoConstRef CGetGraphInfo() const
FIXME: this method should become protected and called by equivalent method in subclasses Get the grap...
Definition: graph.hpp:1062
CdfcWriter(const CdfcGraph *_printing_graph)
Constructor.
Data flow graph with feedback.
This package is used to define the storage value scheme adopted by the register allocation algorithms...
unsigned int get_number_channels(unsigned int fu_name) const
return the number of channels available for the functional unit
std::string get_string_name(unsigned int fu_name) const
Returns the name of the functional for debug purpose.
Predicate functor object used to select the proper set of vertices.
#define OUTPUT_LEVEL_VERY_PEDANTIC
verbose debugging print is performed.
const CustomUnorderedMap< vertex, double > & slack_time
Collect all structs used to write a graph in the dot format.
T * GetPointer(const refcount< U > &t)
Template function used to hide dynamic_cast The template parameter T represents a type of an object h...
Definition: refcount.hpp:237
Data structure used to store the functional-unit binding of the vertexes.
const OpGraphConstRef CGetOpGraph(FunctionBehavior::graph_type gt) const
This method returns the operation graphs.
#define WORK_LIBRARY
working library.
#define INFINITE_UINT
UNSIGNED INT representing infinite.
Definition: utility.hpp:70
unsigned int get_memory_var(const unsigned int fu_name) const
Returns the base address of the functional unit.
const CustomOrderedSet< vertex > & get_state_where_run(vertex op) const
return in which support vertex the operation is running
Definition: liveness.cpp:241
size_t get_representative_in(vertex op1) const
Return the representative vertex associated with the chained vertices set in input.
hlsRef HLS
HLS data structure of the function to be analyzed.
This file collects some hash functors.
CdfcGraphInfo(const CustomUnorderedMap< vertex, vertex > &c2s, const OpGraphConstRef operation_graph)
Constructor.
virtual unsigned int get_storage_value_index(vertex curr_vertex, unsigned int var_index)=0
Returns the index of the storage value associated with the variable in a given vertex.
Functor used to compare which of two resources has to be considered first during the binding...
Data structures used in operations graph.
Template borrowed from the ANTLR library by Terence Parr (http://www.jGuru.com - Software rights: htt...
Definition: refcount.hpp:94
int level
Definition: main.c:98
unsigned int get_number_fu(unsigned int fu_name) const
Returns the number of functional unit given its id.
this class is used to manage the command-line or XML options.
const CdfcGraphInfo * cdfc_graph_info
The info associated with the graph to be printed.
static const int COMPATIBILITY_EDGE
void estimate_muxes(const connection_relation &con_rel, unsigned long long mux_prec, double &tot_mux_delay, double &tot_mux_area, const cluster_type &cluster, unsigned int &total_muxes, unsigned int &n_shared, const CustomUnorderedMap< vertex_type, vertex > &converter, const HLS_managerRef HLSMgr, const hlsRef HLS, int debug_level)
bool is_filtering() const override
unsigned int functionId
this is the identifier of the function to be implemented
Definition: hls.hpp:87
#define TYPE_ENTRY
constant identifying the node type of an entry node.
boost::filtered_graph< boost_cc_compatibility_graph, cc_compatibility_graph_edge_selector< boost_cc_compatibility_graph >, cc_compatibility_graph_vertex_selector< boost_cc_compatibility_graph > > cc_compatibility_graph
compatibility graph
bool is_memory_unit(const unsigned int fu_name) const
Returns true if the fu_name is a memory unit.
bool is_indirect_access_memory_unit(unsigned int fu) const
return true in case fu type is a resource unit performing an indirect access to memory ...
static bool compute_condition2(bool cond1, unsigned long long fu_prec, double resource_area, const double small_normalized_resource_area)
refcount< const cdfc_graph > cdfc_graphConstRef
refcount< cdfc_graph > cdfc_graphRef
refcount version of cdfc_graph
const HLS_constraintsRef HLS_C
store the HLS constraints
Definition: hls.hpp:110
Class to manage mux-based interconnections based on the FSM controller.
int debug_level
The debug level.
double GetEndingTime(const unsigned int operation) const
Return the starting time of the operation.
Definition: schedule.cpp:1140
boost::graph_traits< cdfc_graph >::vertex_descriptor cdfc_vertex
vertex definition
#define OUTPUT_LEVEL_VERBOSE
verbose debugging print is performed.
refcount< const HLSFlowStepSpecialization > HLSFlowStepSpecializationConstRef
const refcount definition of the class
Definition: hls_step.hpp:93
#define TYPE_RET
constant string identifying an operation node of type return expr
Definition: op_graph.hpp:140
graphs_collection * collection
The graph collection.
Definition: graph.hpp:846
void topological_based_sorting(const VertexListGraph &g, std::vector< vertex > &c2s, const OpGraphConstRef sdg, OutputIterator result, const boost::bgl_named_params< P, T, R > &params)
bool select_candidate_to_remove(const CustomOrderedSet< C_vertex > &candidate_clique, C_vertex &v, const CustomUnorderedMap< C_vertex, vertex > &converter, const cc_compatibility_graph &cg) const override
#define DEBUG_LEVEL_VERBOSE
verbose debugging print is performed.
boost::graph_traits< cc_compatibility_graph >::vertex_descriptor C_vertex
cc_compatibility_graph vertex
const connection_relation & con_rel
ControlStep get_initiation_time(const unsigned int fu_name, const vertex v) const
Returns the initiation time for a given vertex and a given functional unit.
Data structure definition for high-level synthesis flow.
bool is_readonly_memory_unit(const unsigned int fu_name) const
return true in case the functional unit is a read-only memory unit
boost::graph_traits< cdfc_graph >::out_edge_iterator cdfc_out_edge_iterator
out_edge_iterator definition.
#define NULL_VERTEX
null vertex definition
Definition: graph.hpp:1305
Datastructure to represent memory information in high-level synthesis.
#define PROXY_LIBRARY
proxy library
unsigned int get_index(const vertex &v) const
Returns the index of functional unit assigned to the vertex.
Definition: fu_binding.cpp:261
Generic class managing module binding algorithms.
Class specification of the manager of the tree structures extracted from the raw file.
const HLSFlowStepSpecializationConstRef hls_flow_step_specialization
The information about specialization.
Definition: hls_step.hpp:211
HLS specialization of generic_device.
A brief description of the C++ Header File.
double EstimateControllerDelay() const
estimate the delay of the controller in straightforward mode
ChainingInformationRef chaining_information
Store the refcounted chaining info.
Definition: hls.hpp:142
size_t clique_cost(const CustomOrderedSet< C_vertex > &candidate_clique, const CustomUnorderedMap< C_vertex, vertex > &converter) const override
boost::graph_traits< graph >::edge_descriptor EdgeDescriptor
edge definition.
Definition: graph.hpp:1316
#define THROW_ASSERT(cond, str_expr)
helper function used to check an assert and if needed to throw an error in a standard way ...
Definition: exceptions.hpp:289

Generated on Mon Feb 12 2024 13:02:53 for PandA-2024.02 by doxygen 1.8.13