80 const std::pair<const CustomMap<unsigned long long, CustomUnorderedMapStable<unsigned int, double>>&,
86 if(mux_timing_db.empty() || mux_area_db.empty())
94 auto* fu_br = GetPointer<functional_unit_template>(f_unit_mux);
96 auto* op_mux = GetPointer<operation>(op_mux_node);
97 std::string temp_portsize_parameters = op_mux->portsize_parameters;
98 std::vector<unsigned int> mux_precisions;
99 mux_precisions.push_back(1);
100 mux_precisions.push_back(8);
101 mux_precisions.push_back(16);
102 mux_precisions.push_back(32);
103 mux_precisions.push_back(64);
104 std::vector<std::string> parameters_split =
SplitString(temp_portsize_parameters,
"|");
105 THROW_ASSERT(parameters_split.size() > 0,
"unexpected portsize_parameter format");
106 for(
auto module_prec : mux_precisions)
108 for(
auto& el_indx : parameters_split)
110 std::vector<std::string> parameters_pairs =
SplitString(el_indx,
":");
111 if(parameters_pairs[0] ==
"*")
113 temp_portsize_parameters = parameters_pairs[1];
116 else if(static_cast<unsigned>(std::stoul(parameters_pairs[0])) == module_prec)
118 temp_portsize_parameters = parameters_pairs[1];
123 "expected some portsize0_parameters for the the template operation");
124 std::vector<std::string> portsize_parameters =
SplitString(temp_portsize_parameters,
",");
125 for(
const auto& n_inputs : portsize_parameters)
128 allocation_information->
hls_manager->get_HLS_device()->get_technology_manager()->get_fu(
129 std::string(
MUX_N_TO_1) +
"_" +
STR(module_prec) +
"_" +
STR(module_prec) +
"_" +
STR(module_prec) +
134 const functional_unit* fu_cur = GetPointer<functional_unit>(fu_cur_obj);
139 cur_area = a_m->get_area_value();
141 auto n_inputs_value =
static_cast<unsigned>(std::stoul(n_inputs));
142 mux_area_db[module_prec][n_inputs_value] = cur_area;
144 mux_timing_db[module_prec][n_inputs_value] = fu_cur_operation->time_m->get_execution_time() *
150 #define MAX_MUX_N_INPUTS 65 151 for(
auto module_prec : mux_precisions)
153 if(mux_area_db.find(module_prec) == mux_area_db.end())
155 THROW_ASSERT(mux_timing_db.find(module_prec) == mux_timing_db.end(),
"unexpected condition");
158 unsigned int n_levels;
159 for(n_levels = 1; n_ins > (1ULL << n_levels); ++n_levels)
163 mux_area_db[module_prec][n_ins] = (n_ins - 1) * allocation_information->
mux_area_unit_raw(module_prec);
164 mux_timing_db[module_prec][n_ins] = n_levels *
172 THROW_ASSERT(mux_timing_db.find(module_prec) != mux_timing_db.end(),
"unexpected condition");
173 THROW_ASSERT(mux_area_db.find(module_prec)->second.find(2) != mux_area_db.find(module_prec)->second.end(),
174 "unexpected condition");
175 THROW_ASSERT(mux_timing_db.find(module_prec)->second.find(2) !=
176 mux_timing_db.find(module_prec)->second.end(),
177 "unexpected condition");
178 unsigned int prev_non_null = 2;
181 if(mux_area_db.find(module_prec)->second.find(n_ins) != mux_area_db.find(module_prec)->second.end())
183 if(prev_non_null + 1 != n_ins)
185 for(; prev_non_null + 1 < n_ins; ++prev_non_null)
187 mux_area_db[module_prec][prev_non_null + 1] =
188 mux_area_db.find(module_prec)->second.find(prev_non_null)->second +
189 (mux_area_db.find(module_prec)->second.find(n_ins)->second -
190 mux_area_db.find(module_prec)->second.find(prev_non_null)->second) /
191 (n_ins - prev_non_null);
192 mux_timing_db[module_prec][prev_non_null + 1] =
193 mux_timing_db.find(module_prec)->second.find(prev_non_null)->second +
194 (mux_timing_db.find(module_prec)->second.find(n_ins)->second -
195 mux_timing_db.find(module_prec)->second.find(prev_non_null)->second) /
196 (n_ins - prev_non_null);
199 prev_non_null = n_ins;
204 THROW_ASSERT(mux_area_db.find(64) != mux_area_db.end(),
"unexpected condition");
205 THROW_ASSERT(!mux_area_db.at(64).empty(),
"unexpected condition");
206 THROW_ASSERT(mux_timing_db.find(64) != mux_timing_db.end(),
"unexpected condition");
207 THROW_ASSERT(!mux_timing_db.at(64).empty(),
"unexpected condition");
208 mux_area_db[128].insert(mux_area_db.at(64).begin(), mux_area_db.at(64).end());
209 mux_timing_db[128].insert(mux_timing_db.at(64).begin(), mux_timing_db.at(64).end());
213 return std::pair<const CustomMap<unsigned long long, CustomUnorderedMapStable<unsigned int, double>>&,
218 const std::tuple<const std::vector<unsigned int>&,
const std::vector<unsigned int>&>
221 static std::vector<unsigned int>
DSP_x_db;
222 static std::vector<unsigned int>
DSP_y_db;
223 if(!(DSP_x_db.size() || DSP_y_db.size()))
226 const auto hls_d = allocation_information->
hls_manager->get_HLS_device();
227 if(hls_d->has_parameter(
"DSPs_x_sizes"))
229 THROW_ASSERT(hls_d->has_parameter(
"DSPs_y_sizes"),
"device description is not complete");
230 auto DSPs_x_sizes = hls_d->get_parameter<std::string>(
"DSPs_x_sizes");
231 auto DSPs_y_sizes = hls_d->get_parameter<std::string>(
"DSPs_y_sizes");
232 std::vector<std::string> DSPs_x_sizes_vec =
SplitString(DSPs_x_sizes,
",");
233 std::vector<std::string> DSPs_y_sizes_vec =
SplitString(DSPs_y_sizes,
",");
234 size_t n_elements = DSPs_x_sizes_vec.size();
235 DSP_x_db.resize(n_elements);
236 DSP_y_db.resize(n_elements);
239 DSP_x_db[
index] =
static_cast<unsigned>(std::stoul(DSPs_x_sizes_vec[
index]));
240 DSP_y_db[
index] =
static_cast<unsigned>(std::stoul(DSPs_y_sizes_vec[index]));
244 return std::tuple<const std::vector<unsigned int>&,
const std::vector<unsigned int>&>(
DSP_x_db,
DSP_y_db);
276 return static_cast<unsigned int>(
list_of_FU.size());
292 const auto entry_string_cst = std::string(
"Entry");
293 const auto exit_string_cst = std::string(
"Exit");
294 const auto node_operation = [&]() -> std::string {
297 return entry_string_cst;
301 return exit_string_cst;
305 const auto vtf_it =
node_id_to_fus.find(std::pair<unsigned int, std::string>(v, node_operation));
307 return vtf_it->second;
337 "This function (" +
get_string_name(fu_name) +
") cannot implement the operation " +
STR(v));
342 const auto operation_name =
346 "Timing information not specified for unit " +
id_to_fu_names.find(fu_name)->second.first);
347 double clock_budget =
HLS_C->get_clock_period() *
HLS_C->get_clock_period_resource_fraction();
348 auto n_cycles = GetPointer<operation>(node_op)->time_m->get_cycles();
351 const double stage_time = [&]() ->
double {
353 if(GetPointer<functional_unit>(
list_of_FU[fu_name])->component_timing_alias !=
"")
355 std::string component_name = GetPointer<functional_unit>(
list_of_FU[fu_name])->component_timing_alias;
356 std::string library =
HLS_D->get_technology_manager()->get_library(component_name);
358 THROW_ASSERT(f_unit_alias,
"Library miss component: " + component_name);
359 auto* fu_alias = GetPointer<functional_unit>(f_unit_alias);
361 operation* op_alias = op_alias_node ? GetPointer<operation>(op_alias_node) :
362 GetPointer<operation>(fu_alias->get_operations().front());
371 if(stage_time < clock_budget && stage_time > 0)
373 return (n_cycles - 1) * clock_budget + stage_time;
378 if(exec_time > (n_cycles - 1) * clock_budget && exec_time < n_cycles * clock_budget)
384 return n_cycles * clock_budget;
390 if(GetPointer<functional_unit>(
list_of_FU[fu_name])->component_timing_alias !=
"")
393 std::string component_name = GetPointer<functional_unit>(
list_of_FU[fu_name])->component_timing_alias;
394 std::string library =
HLS_D->get_technology_manager()->get_library(component_name);
396 THROW_ASSERT(f_unit_alias,
"Library miss component: " + component_name);
397 auto* fu_alias = GetPointer<functional_unit>(f_unit_alias);
401 op_alias_node = op_alias_node ? op_alias_node : fu_alias->get_operations().front();
412 unsigned int fu_name;
425 const auto node_operation = [&]() -> std::string {
437 node_id_to_fus.find(std::pair<unsigned int, std::string>(node_id, node_operation))->second;
441 auto f_i = fu_set.begin();
443 while(CF && f_i != f_end &&
444 ((*CF)(*f_i) <= 0 || (
binding.find(node_id) !=
binding.end() &&
binding.find(node_id)->second.second != *f_i)))
458 ControlStep temp(0u);
466 GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[fu_name])->get_operation(op_name))->time_m,
467 "Timing information not specified for operation " + op_name +
" on unit " +
470 GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[fu_name])->get_operation(op_name))
471 ->time_m->get_initiation_time();
479 for(; f_i != f_end; ++f_i)
481 if(CF && (*CF)(*f_i) <= 0)
485 switch(allocation_min_max)
489 GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name))
491 "Timing information not specified for operation " + op_name +
" on unit " +
493 temp =
std::max(int_value, GetPointer<operation>(
494 GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name))
495 ->time_m->get_initiation_time());
499 GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name))
501 "Timing information not specified for operation " + op_name +
" on unit " +
503 temp =
std::min(int_value, GetPointer<operation>(
504 GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name))
505 ->time_m->get_initiation_time());
508 temp = ControlStep(0u);
509 THROW_ERROR(std::string(
"Not supported AllocationInformation::op_performed"));
512 if(temp != int_value)
530 THROW_ASSERT(GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[fu_name])->get_operation(op_name)),
531 op_name +
" not provided by " +
list_of_FU[fu_name]->get_name());
533 GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[fu_name])->get_operation(op_name))->time_m,
534 "Timing information not specified for operation " + op_name +
" on unit " +
537 fu_name, GetPointer<functional_unit>(
list_of_FU[fu_name])->get_operation(op_name));
544 for(; f_i != f_end; ++f_i)
546 if(CF && (*CF)(*f_i) <= 0)
550 switch(allocation_min_max)
554 GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name))
556 "Timing information not specified for operation " + op_name +
" on unit " +
560 fu_name, GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name)));
564 GetPointer<operation>(GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name))
566 "Timing information not specified for operation " + op_name +
" on unit " +
570 fu_name, GetPointer<functional_unit>(
list_of_FU[*f_i])->get_operation(op_name)));
574 THROW_ERROR(std::string(
"Not supported AllocationInformation::op_performed"));
577 if(temp != double_value)
588 THROW_ERROR(std::string(
"Not supported AllocationInformation::op_target"));
613 for(
auto f_i = fu_set.begin(); f_i != f_end; ++f_i)
617 min_num_res = min_num_res > num_res ? num_res : min_num_res;
630 std::string memory_ctrl_type = GetPointer<functional_unit>(current_fu)->memory_ctrl_type;
644 double max_value = 0.0;
645 auto no_it_end = node_ops.end();
646 for(
auto no_it = node_ops.begin(); no_it != no_it_end; ++no_it)
678 const auto stmt_kind = stmt->
get_kind();
679 if(stmt_kind == gimple_assign_K)
681 const auto ga = GetPointerS<const gimple_assign>(stmt);
683 if(op1_kind == ssa_name_K || op1_kind == integer_cst_K || op1_kind == convert_expr_K || op1_kind == nop_expr_K ||
684 op1_kind == bit_ior_concat_expr_K || op1_kind == extract_bit_expr_K)
688 else if((op1_kind == rshift_expr_K || op1_kind == lshift_expr_K) &&
693 else if(op1_kind == cond_expr_K || op1_kind == vec_cond_expr_K)
696 "Cond expr not allocated " + ga->op1->ToString());
707 if(op1_kind == widen_mult_expr_K || op1_kind == mult_expr_K)
709 const auto in_prec = op1_kind == mult_expr_K ? fu_prec : (fu_prec / 2);
713 else if(op1_kind == lut_expr_K)
734 const auto new_stmt_temp =
HLS_D->get_technology_manager()->get_fu(fu_name,
LIBRARY_STD_FU);
735 THROW_ASSERT(new_stmt_temp,
"Functional unit '" + fu_name +
"' not found");
736 const auto new_stmt_fu = GetPointerS<const functional_unit>(new_stmt_temp);
737 return new_stmt_fu->area_m->get_area_value();
739 else if(stmt_kind == gimple_multi_way_if_K || stmt_kind == gimple_cond_K || stmt_kind == gimple_return_K)
772 const unsigned int statement_index)
const 776 return ControlStep(0u);
780 "This function (" +
get_string_name(fu_name) +
") cannot implement the operation " + operation_name);
783 return ControlStep(0u);
788 "Timing information not specified for unit " +
id_to_fu_names.find(fu_name)->second.first);
789 return GetPointer<operation>(node_op)->time_m->get_initiation_time();
798 THROW_ASSERT(GetPointer<operation>(op_node),
"Op node is not an operation");
799 return GetPointer<operation>(op_node)->is_bounded();
805 std::string op_string =
810 THROW_ASSERT(GetPointer<operation>(op_node),
"Op node is not an operation: " + op_string);
811 return GetPointer<operation>(op_node)->is_bounded();
815 unsigned int fu_type)
const 821 THROW_ASSERT(GetPointer<operation>(op_node),
"Op node is not an operation");
822 return GetPointer<operation>(op_node)->is_primary_inputs_registered();
828 std::string op_string =
832 THROW_ASSERT(GetPointer<operation>(op_node),
"Op node is not an operation");
833 return GetPointer<operation>(op_node)->is_primary_inputs_registered();
852 const auto ga = GetPointer<const gimple_assign>(tn);
861 right_kind == cond_expr_K || right_kind == vec_cond_expr_K || right_kind == convert_expr_K ||
862 right_kind == nop_expr_K || right_kind == bit_ior_concat_expr_K ||
863 right_kind == extract_bit_expr_K || right_kind == lut_expr_K || right_kind == truth_not_expr_K ||
864 right_kind == bit_not_expr_K || right_kind == negate_expr_K || right_kind == bit_xor_expr_K ||
865 right_kind == bit_ior_expr_K || right_kind == bit_and_expr_K || right_kind == truth_and_expr_K ||
866 right_kind == truth_or_expr_K || right_kind == truth_xor_expr_K || right_kind == lshift_expr_K ||
867 right_kind == rshift_expr_K || right_kind == widen_mult_expr_K || right_kind == mult_expr_K ||
868 right_kind == plus_expr_K || right_kind == minus_expr_K || right_kind == ternary_plus_expr_K ||
869 right_kind == eq_expr_K || right_kind == ne_expr_K || right_kind == lt_expr_K ||
870 right_kind == le_expr_K || right_kind == gt_expr_K || right_kind == ge_expr_K ||
871 right_kind == ternary_mp_expr_K || right_kind == ternary_pm_expr_K ||
872 right_kind == ternary_mm_expr_K,
876 if(GetPointer<const gimple_nop>(tn))
880 if(GetPointer<const gimple_phi>(tn))
884 THROW_ERROR(
"Unexpected operation in AllocationInformation::is_operation_bounded: " + tn->get_kind_text());
891 std::string memory_type = GetPointer<functional_unit>(current_fu)->memory_type;
892 std::string memory_ctrl_type = GetPointer<functional_unit>(current_fu)->memory_ctrl_type;
893 return memory_type ==
"ASYNCHRONOUS" || memory_type ==
"SYNCHRONOUS_SDS" ||
901 std::string memory_type = GetPointer<functional_unit>(current_fu)->memory_type;
902 std::string memory_ctrl_type = GetPointer<functional_unit>(current_fu)->memory_ctrl_type;
912 std::string memory_ctrl_type = GetPointer<functional_unit>(current_fu)->memory_ctrl_type;
968 fu_name =
binding.find(v)->second.second;
976 std::string fu_string_name =
list_of_FU[fu_name]->get_name();
1023 std::string fu_string_name =
list_of_FU[fu_name]->get_name();
1051 "This function (" +
get_string_name(fu_name) +
") cannot implement the operation " +
1060 "Timing information not specified for unit " +
id_to_fu_names.find(fu_name)->second.first);
1063 if(GetPointer<functional_unit>(
list_of_FU[fu_name])->component_timing_alias !=
"")
1065 std::string component_name = GetPointer<functional_unit>(
list_of_FU[fu_name])->component_timing_alias;
1066 std::string library =
HLS_D->get_technology_manager()->get_library(component_name);
1068 THROW_ASSERT(f_unit_alias,
"Library miss component: " + component_name);
1069 auto* fu_alias = GetPointer<functional_unit>(f_unit_alias);
1071 operation* op_alias = op_alias_node ? GetPointer<operation>(op_alias_node) :
1072 GetPointer<operation>(fu_alias->get_operations().front());
1098 return HLS_C->get_clock_period();
1105 STR(fu_prec) +
" not found in mux database of " +
STR(
mux_timing_db.size()) +
" elements");
1112 "fu_prec:" +
STR(fu_prec) +
" mux_ins: " +
STR(mux_ins));
1156 "This function (" +
get_string_name(fu_name) +
") cannot implement the operation " +
1164 THROW_ASSERT(GetPointer<operation>(node_op)->time_m,
"Timing information not specified for operation " +
1165 node_op->get_name() +
" on unit " +
1167 return GetPointer<operation>(node_op)->time_m->get_cycles();
1192 return list_of_FU[fu_name]->get_name() +
"_" +
STR(fu_name);
1253 double max_value = 0.0;
1254 auto no_it_end = node_ops.end();
1255 for(
auto no_it = node_ops.begin(); no_it != no_it_end; ++no_it)
1283 unsigned int tot_num_res = 0;
1286 for(
auto f_i = fu_set.begin(); f_i != f_end; ++f_i)
1289 THROW_ASSERT(num_res != 0,
"something wrong happened");
1296 tot_num_res += num_res;
1306 "no operation can be mapped on the given functional unit");
1307 return static_cast<unsigned int>(
fus_to_node_id.find(fu)->second.size());
1322 unsigned int first_valid_id = 0;
1323 unsigned int index = 0;
1325 if(vars_read.empty())
1332 bool is_a_pointer =
false;
1334 bool is_second_constant =
false;
1336 unsigned long long max_size_in = 0;
1337 unsigned long long min_n_elements = 0;
1338 bool is_cond_expr_bool_test =
false;
1339 for(
auto itr = vars_read.begin(), end = vars_read.end(); itr != end; ++itr, ++
index)
1341 const auto id = std::get<0>(*itr);
1342 if(
id && !first_valid_id)
1344 first_valid_id = id;
1350 is_cond_expr_bool_test =
true;
1353 if((current_op ==
"cond_expr" || current_op ==
"vec_cond_expr") && index != 0 && id)
1355 first_valid_id = id;
1357 if(current_op ==
"cond_expr" || current_op ==
"vec_cond_expr")
1359 is_second_constant =
true;
1363 !is_constrained && !is_second_constant && vars_read.size() != 1 && current_op !=
"mult_expr" &&
1364 current_op !=
"widen_mult_expr" && current_op !=
"insertelement_expr" &&
1365 current_op !=
"extractelement_expr" &&
1366 (index == 1 || current_op !=
"lut_expr" || current_op !=
"extract_bit_expr")))
1368 info->input_prec.push_back(0);
1369 info->real_input_nelem.push_back(0);
1370 info->base128_input_nelem.push_back(0);
1371 is_second_constant =
true;
1381 max_size_in =
std::max(max_size_in, element_size);
1382 if(min_n_elements == 0 || ((128 / element_size) < min_n_elements))
1384 min_n_elements = 128 / element_size;
1400 info->input_prec.push_back(32);
1401 info->real_input_nelem.push_back(0);
1402 info->base128_input_nelem.push_back(0);
1409 const auto size_form_par = form_par_type ?
tree_helper::Size(form_par_type) : 0;
1410 const auto size_value = size_form_par ? size_form_par : size_tree_var;
1411 if(form_par_type && index == 0)
1413 formal_parameter_type = form_par_type;
1420 info->real_input_nelem.push_back(vector_size / element_size);
1421 info->base128_input_nelem.push_back(128 / element_size);
1422 info->input_prec.push_back(element_size);
1424 "---Type is " +
STR(type->
index) +
" " +
STR(type) +
1425 " - Number of input elements (base128): " +
STR(128 / element_size) +
1426 " - Number of real input elements: " +
STR(vector_size / element_size) +
1427 " - Input precision: " +
STR(element_size));
1431 info->real_input_nelem.push_back(0);
1432 info->base128_input_nelem.push_back(0);
1433 info->input_prec.push_back(size_value);
1440 if(formal_parameter_type)
1442 type = formal_parameter_type;
1453 info->node_kind =
"VECTOR_BOOL";
1457 info->node_kind =
"INT";
1461 info->node_kind =
"REAL";
1465 info->node_kind =
"UINT";
1469 info->node_kind =
"VECTOR_BOOL";
1476 info->node_kind =
"VECTOR_INT";
1480 info->node_kind =
"VECTOR_UINT";
1484 info->node_kind =
"VECTOR_REAL";
1492 const auto max_size_in_true =
1493 std::max(max_size_in, *std::max_element(info->input_prec.begin(), info->input_prec.end()));
1494 for(
const auto n_elements : info->base128_input_nelem)
1496 if(n_elements && (min_n_elements == 0 || (n_elements < min_n_elements)))
1498 min_n_elements = n_elements;
1502 if(is_cond_expr_bool_test)
1504 info->is_single_bool_test_cond_expr =
true;
1510 if(current_op ==
"widen_mult_expr" || current_op ==
"mult_expr")
1520 info->real_output_nelem = output_size / element_size;
1521 info->base128_output_nelem = 128 / element_size;
1522 info->output_prec = element_size;
1523 info->input_prec[0] = max_size_in;
1524 info->input_prec[1] = max_size_in;
1528 THROW_ASSERT(info->input_prec.size() == 2,
"unexpected number of inputs");
1530 if(output_size_true < info->input_prec[0])
1532 info->input_prec[0] = output_size_true;
1534 if(output_size_true < info->input_prec[1])
1536 info->input_prec[1] = output_size_true;
1538 if(info->input_prec[0] > info->input_prec[1])
1540 std::swap(info->input_prec[0], info->input_prec[1]);
1542 bool resized =
false;
1544 const auto resized_second_index =
resize_1_8_pow2(info->input_prec[1]);
1546 for(
size_t ind = 0; ind <
DSP_y_db.size() && !resized; ind++)
1548 const auto y_dsp_size =
DSP_y_db[ind];
1550 if(info->input_prec[1] < y_dsp_size && resized_y_dsp_size == resized_second_index)
1552 if(info->input_prec[0] <
DSP_x_db[ind])
1555 info->input_prec[1] = y_dsp_size;
1556 info->input_prec[0] =
DSP_x_db[ind];
1562 max_size_in =
std::max(info->input_prec[0], info->input_prec[1]);
1564 info->input_prec[0] = max_size_in;
1565 info->input_prec[1] = max_size_in;
1566 info->output_prec = max_size_in;
1574 info->output_prec = max_size_in;
1576 if(current_op ==
"widen_mult_expr")
1578 info->output_prec = info->input_prec[0] + info->input_prec[1];
1580 info->real_output_nelem = info->base128_output_nelem = 0;
1584 current_op ==
"dot_prod_expr" || current_op ==
"widen_sum_expr" || current_op ==
"widen_mult_hi_expr" ||
1585 current_op ==
"widen_mult_lo_expr" || current_op ==
"vec_unpack_hi_expr" ||
1586 current_op ==
"vec_unpack_lo_expr")
1589 if(
starts_with(current_op,
"float_expr_") && max_size_in < 32)
1602 info->output_prec = 32;
1611 info->base128_output_nelem = 128 / element_size;
1612 info->real_output_nelem = info->output_prec / element_size;
1613 info->output_prec = element_size;
1615 "---Number of output elements (base128): " +
STR(info->base128_output_nelem) +
1616 " - Number of real output elements: " +
STR(info->real_output_nelem) +
1617 " - Output precision: " +
STR(info->output_prec));
1622 info->real_output_nelem = 0;
1623 info->base128_output_nelem = 0;
1628 if(
starts_with(current_op,
"fix_trunc_expr_") && info->output_prec < 32)
1630 info->output_prec = 32;
1633 if(current_op ==
"dot_prod_expr")
1635 max_size_in = info->output_prec / 2;
1636 min_n_elements = info->base128_output_nelem * 2;
1639 else if(current_op ==
"plus_expr" || current_op ==
"minus_expr" || current_op ==
"pointer_plus_expr" ||
1640 current_op ==
"ternary_plus_expr" || current_op ==
"ternary_pm_expr" || current_op ==
"ternary_mp_expr" ||
1641 current_op ==
"ternary_mm_expr" || current_op ==
"negate_expr" || current_op ==
"bit_and_expr" ||
1642 current_op ==
"bit_ior_expr" || current_op ==
"bit_xor_expr" || current_op ==
"bit_not_expr" ||
1643 current_op ==
"bit_ior_concat_expr" || current_op ==
"cond_expr" ||
1644 current_op ==
"vec_cond_expr" 1657 info->real_output_nelem = output_size / element_size;
1658 info->base128_output_nelem = 128 / element_size;
1659 info->output_prec = element_size;
1663 if(current_op ==
"plus_expr" || current_op ==
"minus_expr" || current_op ==
"pointer_plus_expr" ||
1664 current_op ==
"ternary_plus_expr" || current_op ==
"ternary_pm_expr" || current_op ==
"ternary_mp_expr" ||
1665 current_op ==
"ternary_mm_expr" || current_op ==
"negate_expr")
1667 if(out_prec == 9 || out_prec == 17 || out_prec == 33)
1670 max_size_in = out_prec;
1673 else if(current_op ==
"bit_and_expr" || current_op ==
"bit_ior_expr" || current_op ==
"bit_xor_expr" ||
1674 current_op ==
"bit_not_expr" || current_op ==
"bit_ior_concat_expr")
1677 out_prec =
std::min(out_prec, 64ull);
1680 info->real_output_nelem = 0;
1681 info->base128_output_nelem = 0;
1683 if(current_op ==
"cond_expr" && max_size_in > 64 && info->node_kind ==
"VECTOR_BOOL")
1688 if(info->output_prec >= max_size_in)
1690 info->output_prec = max_size_in;
1691 info->base128_output_nelem = min_n_elements;
1692 info->real_output_nelem = min_n_elements;
1696 max_size_in = info->output_prec;
1697 min_n_elements = info->base128_output_nelem;
1701 else if(current_op ==
"lshift_expr")
1712 info->real_output_nelem = info->output_prec / element_size;
1713 info->base128_output_nelem = 128 / element_size;
1714 info->output_prec = element_size;
1716 "---Type is " +
STR(type->
index) +
" " +
STR(type) +
1717 " - Number of output elements (base128): " +
STR(info->base128_output_nelem) +
1718 " - Number of real output elements: " +
STR(info->real_output_nelem) +
1719 " - Output precision: " +
STR(info->output_prec));
1723 if(is_second_constant && info->output_prec > 64)
1725 info->output_prec = 64;
1728 info->real_output_nelem = 0;
1729 info->base128_output_nelem = 0;
1732 if(info->output_prec >= max_size_in)
1734 max_size_in = info->output_prec;
1735 min_n_elements = info->base128_output_nelem;
1740 info->output_prec = max_size_in;
1741 info->base128_output_nelem = min_n_elements;
1742 info->real_output_nelem = min_n_elements;
1745 else if(current_op ==
"rshift_expr")
1747 if(max_size_in > 64)
1749 if(!is_second_constant)
1752 "A bad estimation of the timing of the rshift_expr operator will happen. This may occur when a " 1753 "non-constant bit reference of a long ac_type is used. Unrolling such a part may fix the issue.");
1757 info->output_prec = max_size_in;
1758 info->base128_output_nelem = min_n_elements;
1759 info->real_output_nelem = min_n_elements;
1763 info->output_prec = max_size_in;
1764 info->base128_output_nelem = min_n_elements;
1765 info->real_output_nelem = min_n_elements;
1767 size_t n_inputs = info->input_prec.size();
1768 if(current_op !=
"widen_mult_expr" && current_op !=
"mult_expr")
1770 for(
unsigned int i = 0; i < n_inputs; ++i)
1772 if(info->input_prec[i] != 0)
1774 info->input_prec[i] = max_size_in;
1778 for(
auto& n_elements : info->base128_input_nelem)
1782 n_elements = min_n_elements;
1787 if(current_op ==
"vec_perm_expr")
1789 if(info->input_prec[2] == 0)
1791 std::swap(info->input_prec[2], info->input_prec[1]);
1792 std::swap(info->base128_input_nelem[2], info->base128_input_nelem[1]);
1793 std::swap(info->real_input_nelem[2], info->real_input_nelem[1]);
1810 tech[name] =
static_cast<unsigned int>(
static_cast<int>(
tech[name]) + delta);
1821 THROW_ASSERT(fu_name < get_number_fu_types(),
"functional unit id not meaningful");
1822 THROW_ASSERT(precision_map.find(fu_name) != precision_map.end(),
"missing the precision of " +
STR(fu_name));
1823 return precision_map.find(fu_name)->second != 0 ? precision_map.find(fu_name)->second : 32;
1828 return estimate_muxNto1_delay(fu_prec, 2);
1837 STR(fu_prec) +
"_" +
STR(fu_prec));
1838 auto* fu_br = GetPointer<functional_unit>(f_unit_mux);
1840 auto* op_mux = GetPointer<operation>(op_mux_node);
1841 double mux_delay = time_m_execution_time(op_mux) - get_setup_hold_time();
1842 if(mux_delay <= 0.0)
1844 mux_delay = get_setup_hold_time() / 2;
1854 auto fu_end = list_of_FU.end();
1855 unsigned int index = 0;
1856 for(
auto fu = list_of_FU.begin(); fu != fu_end; ++fu)
1862 if(!node_id_to_fus.empty())
1864 os <<
"Op_name relation with functional unit name and operations.\n";
1865 for(
const auto& node_id : node_id_to_fus)
1867 for(
const auto fu : node_id.second)
1869 os <<
" [" <<
STR(node_id.first.first) <<
", <" << list_of_FU[fu]->get_name() <<
">]" << std::endl;
1881 for(
const auto& bind : binding)
1889 " Corresponding operation: " +
1891 GetPointer<const gimple_node>(TreeM->CGetTreeNode(bind.first))->
operation) +
1892 "(" +
STR(bind.second.second) +
")");
1893 auto* fu =
dynamic_cast<functional_unit*
>(GetPointer<functional_unit>(list_of_FU[bind.second.second]));
1898 for(
const auto& bind : node_id_to_fus)
1900 if(bind.first.first ==
ENTRY_ID || bind.first.first ==
EXIT_ID || bind.first.first)
1905 " Vertex " +
STR(bind.first.first) +
"(" +
1906 GetPointer<const gimple_node>(TreeM->CGetTreeNode(bind.first.first))->
operation +
")");
1908 for(
const auto fu_id : bind.second)
1910 auto* fu =
dynamic_cast<functional_unit*
>(GetPointer<functional_unit>(list_of_FU[fu_id]));
1912 " FU name: " + fu->get_name() +
"(" +
STR(fu_id) +
")");
1923 if(library_name ==
"")
1932 return GetCycleLatency(op_graph->CGetOpNodeInfo(operationID)->GetNodeId());
1938 "-->Get cycle latency of " + ((operationID !=
ENTRY_ID && operationID !=
EXIT_ID) ?
1939 STR(TreeM->CGetTreeNode(operationID)) :
1941 if(CanImplementSetNotEmpty(operationID))
1943 const auto actual_latency = get_cycles(GetFuType(operationID), operationID);
1944 const auto ret_value = actual_latency != 0 ? actual_latency : 1;
1950 const auto tn = TreeM->CGetTreeNode(operationID);
1951 const auto ga = GetPointer<const gimple_assign>(tn);
1955 if(right_kind == widen_mult_expr_K || right_kind == mult_expr_K)
1958 "<--Latency of not allocated fu is 1: possibly inaccurate");
1961 const auto in_prec = right_kind == mult_expr_K ? fu_prec : (fu_prec / 2);
1962 const auto fu_name =
1964 const auto new_stmt_temp = HLS_D->get_technology_manager()->get_fu(fu_name,
LIBRARY_STD_FU);
1965 THROW_ASSERT(new_stmt_temp,
"Functional unit '" + fu_name +
"' not found");
1966 const auto new_stmt_fu = GetPointer<const functional_unit>(new_stmt_temp);
1968 const auto new_stmt_op = GetPointer<operation>(new_stmt_op_temp);
1969 return new_stmt_op->time_m->get_cycles();
1971 else if(right_kind == call_expr_K)
1975 else if(right_kind == ssa_name_K || right_kind == integer_cst_K || right_kind == cond_expr_K ||
1976 right_kind == vec_cond_expr_K || right_kind == nop_expr_K || right_kind == addr_expr_K ||
1977 right_kind == convert_expr_K || right_kind == lut_expr_K || right_kind == extract_bit_expr_K ||
1978 right_kind == bit_ior_concat_expr_K || right_kind == truth_not_expr_K || right_kind == bit_not_expr_K ||
1979 right_kind == negate_expr_K || right_kind == truth_and_expr_K || right_kind == truth_or_expr_K ||
1980 right_kind == truth_xor_expr_K || right_kind == bit_and_expr_K || right_kind == bit_ior_expr_K ||
1981 right_kind == bit_xor_expr_K || right_kind == rshift_expr_K || right_kind == lshift_expr_K ||
1982 right_kind == plus_expr_K || right_kind == pointer_plus_expr_K || right_kind == minus_expr_K ||
1983 right_kind == eq_expr_K || right_kind == ne_expr_K || right_kind == lt_expr_K ||
1984 right_kind == le_expr_K || right_kind == gt_expr_K || right_kind == ge_expr_K ||
1985 right_kind == ternary_plus_expr_K || right_kind == ternary_mp_expr_K || right_kind == ternary_pm_expr_K ||
1986 right_kind == ternary_mm_expr_K)
1994 else if(tn->get_kind() == gimple_multi_way_if_K || tn->get_kind() == gimple_cond_K ||
1995 tn->get_kind() == gimple_phi_K || tn->get_kind() == gimple_nop_K || tn->get_kind() == gimple_return_K)
2006 const unsigned int stage)
const 2008 return GetTimeLatency(op_graph->CGetOpNodeInfo(operationID)->GetNodeId(), functional_unit, stage);
2012 const unsigned int functional_unit_type,
2013 const unsigned int stage)
const 2017 return std::pair<double, double>(0.0, 0.0);
2021 const unsigned int time_operation_index = [&]() ->
unsigned int {
2024 return operation_index;
2026 if(CanImplementSetNotEmpty(operation_index))
2028 return operation_index;
2030 return operation_index;
2033 const auto num_cycles = GetCycleLatency(time_operation_index);
2034 if(stage > 0 && stage < num_cycles - 1)
2036 const double ret_value = HLS_C->get_clock_period_resource_fraction() * HLS_C->get_clock_period();
2038 return std::pair<double, double>(ret_value, ret_value);
2041 if(CanImplementSetNotEmpty(time_operation_index))
2043 unsigned int fu_type;
2046 fu_type = functional_unit_type;
2050 fu_type = GetFuType(time_operation_index);
2053 "---Functional unit name is " + get_fu_name(fu_type).first);
2054 double connection_contribute = 0;
2056 double actual_execution_time = get_execution_time(fu_type, time_operation_index);
2058 auto n_ins = [&]() ->
unsigned {
2060 auto tn = TreeM->CGetTreeNode(time_operation_index);
2061 const auto ga = GetPointer<const gimple_assign>(tn);
2062 if(ga &&
GET_NODE(ga->op1)->get_kind() == lut_expr_K)
2064 auto le = GetPointer<lut_expr>(
GET_NODE(ga->op1));
2104 double initial_execution_time =
2105 actual_execution_time -
2106 get_correction_time(
2107 fu_type, GetPointer<const gimple_node>(TreeM->CGetTreeNode(time_operation_index))->
operation, n_ins);
2109 "---Initial corrected execution time " +
STR(initial_execution_time));
2110 double op_execution_time = initial_execution_time;
2111 if(op_execution_time <= 0.0)
2117 double actual_stage_period;
2118 actual_stage_period = get_stage_period(fu_type, time_operation_index);
2120 double initial_stage_period = 0.0;
2121 if(get_initiation_time(fu_type, time_operation_index) > 0)
2123 if(actual_stage_period > HLS_C->get_clock_period_resource_fraction() * HLS_C->get_clock_period())
2125 actual_stage_period = HLS_C->get_clock_period_resource_fraction() * HLS_C->get_clock_period();
2127 initial_stage_period =
2128 actual_stage_period -
2129 get_correction_time(
2130 fu_type, GetPointer<const gimple_node>(TreeM->CGetTreeNode(time_operation_index))->
operation, n_ins);
2132 double stage_period = initial_stage_period;
2134 THROW_ASSERT(get_initiation_time(fu_type, time_operation_index) == 0 || stage_period > 0.0,
2135 "unexpected condition: " + get_fu_name(fu_type).first +
" Initiation time " +
2136 STR(get_initiation_time(fu_type, time_operation_index)) +
" Stage period " +
STR(stage_period));
2138 if(stage_period > 0)
2140 stage_period += connection_contribute;
2144 op_execution_time += connection_contribute;
2147 "<--Time is " +
STR(op_execution_time) +
"," +
STR(stage_period));
2148 return std::make_pair(op_execution_time, stage_period);
2154 const auto op_stmt = TreeM->CGetTreeNode(time_operation_index);
2155 const auto op_stmt_kind = op_stmt->get_kind();
2156 if(op_stmt_kind == gimple_assign_K)
2158 const auto ga = GetPointerS<const gimple_assign>(op_stmt);
2160 if(op1_kind == ssa_name_K || op1_kind == integer_cst_K || op1_kind == convert_expr_K ||
2161 op1_kind == nop_expr_K || op1_kind == addr_expr_K || op1_kind == bit_ior_concat_expr_K ||
2162 op1_kind == extract_bit_expr_K)
2165 return std::make_pair(0.0, 0.0);
2167 else if((op1_kind == rshift_expr_K || op1_kind == lshift_expr_K) &&
2172 return std::make_pair(0.0, 0.0);
2174 else if(op1_kind == cond_expr_K || op1_kind == vec_cond_expr_K)
2177 "Cond expr not allocated " + ga->op1->ToString());
2181 const auto op_execution_time = mux_time_unit(fu_prec);
2183 "<--Time is mux time (precision is " +
STR(fu_prec) +
") " +
STR(op_execution_time) +
2185 return std::make_pair(op_execution_time, 0.0);
2190 std::string fu_name;
2191 if(op1_kind == widen_mult_expr_K || op1_kind == mult_expr_K)
2193 const auto in_prec = op1_kind == mult_expr_K ? fu_prec : (fu_prec / 2);
2196 const auto new_stmt_temp = HLS_D->get_technology_manager()->get_fu(fu_name,
LIBRARY_STD_FU);
2197 THROW_ASSERT(new_stmt_temp,
"Functional unit '" + fu_name +
"' not found");
2198 const auto new_stmt_fu = GetPointerS<const functional_unit>(new_stmt_temp);
2200 const auto new_stmt_op = GetPointerS<operation>(new_stmt_op_temp);
2201 auto op_execution_time = time_m_execution_time(new_stmt_op);
2203 "---Uncorrected execution time is " +
STR(op_execution_time));
2204 op_execution_time = op_execution_time - get_setup_hold_time();
2205 double actual_stage_period;
2206 actual_stage_period = time_m_stage_period(new_stmt_op);
2208 "---actual_stage_period=" +
STR(actual_stage_period));
2209 double initial_stage_period = 0.0;
2210 if(new_stmt_op->time_m->get_initiation_time() > 0)
2212 if(actual_stage_period > HLS_C->get_clock_period_resource_fraction() * HLS_C->get_clock_period())
2214 actual_stage_period = HLS_C->get_clock_period_resource_fraction() * HLS_C->get_clock_period();
2216 initial_stage_period = actual_stage_period - get_setup_hold_time();
2218 double stage_period = initial_stage_period;
2220 "<--Time is " +
STR(op_execution_time) +
"," +
STR(stage_period));
2221 return std::make_pair(op_execution_time, stage_period);
2223 else if(op1_kind == lut_expr_K)
2244 const auto new_stmt_temp = HLS_D->get_technology_manager()->get_fu(fu_name,
LIBRARY_STD_FU);
2245 THROW_ASSERT(new_stmt_temp,
"Functional unit '" + fu_name +
"' not found");
2246 const auto new_stmt_fu = GetPointerS<const functional_unit>(new_stmt_temp);
2248 const auto new_stmt_op = GetPointerS<operation>(new_stmt_op_temp);
2249 auto op_execution_time = time_m_execution_time(new_stmt_op);
2251 "---Uncorrected execution time is " +
STR(op_execution_time));
2252 op_execution_time = op_execution_time - get_setup_hold_time();
2254 return std::make_pair(op_execution_time, 0.0);
2256 else if(op_stmt_kind == gimple_multi_way_if_K || op_stmt_kind == gimple_cond_K)
2258 auto controller_delay = estimate_controller_delay_fb();
2260 return std::make_pair(controller_delay, 0.0);
2262 else if(op_stmt_kind == gimple_phi_K || op_stmt_kind == gimple_nop_K || op_stmt_kind == gimple_return_K)
2265 return std::make_pair(0.0, 0.0);
2268 return std::make_pair(0.0, 0.0);
2275 "-->Computing phi connection delay of " +
STR(statement_index));
2277 const auto phi_in_degree = [&]() ->
size_t {
2278 size_t ret_value = 0;
2283 const auto tn = TreeM->CGetTreeNode(statement_index);
2284 if(tn->get_kind() != gimple_assign_K)
2288 const auto ga = GetPointer<const gimple_assign>(tn);
2289 if(
GET_NODE(ga->op0)->get_kind() != ssa_name_K)
2293 const auto sn = GetPointer<const ssa_name>(
GET_NODE(ga->op0));
2295 for(
const auto& use : sn->CGetUseStmts())
2298 if(
target->get_kind() == gimple_phi_K)
2301 const auto gp = GetPointer<const gimple_phi>(
target);
2303 for(
const auto& def_edge : gp->CGetDefEdgesList())
2305 if(def_edge.first->index && !behavioral_helper->is_a_constant(def_edge.first->index))
2307 phi_inputs.insert(def_edge.first->index);
2310 auto curr_in_degree =
static_cast<size_t>(phi_inputs.size());
2311 if(curr_in_degree > 4)
2315 ret_value =
std::max(ret_value, curr_in_degree);
2320 if(phi_in_degree == 0)
2325 const auto statement = TreeM->CGetTreeNode(statement_index);
2326 THROW_ASSERT(statement->get_kind() == gimple_assign_K, statement->ToString());
2327 const auto sn = GetPointerS<const gimple_assign>(statement)->op0;
2330 const auto mux_time = estimate_muxNto1_delay(precision, static_cast<unsigned int>(phi_in_degree));
2332 "<--Delay (" +
STR(phi_in_degree) +
" with " +
STR(precision) +
" bits) is " +
STR(mux_time));
2338 const auto tn = TreeM->CGetTreeReindex(operation_index);
2339 const auto gp = GetPointer<const gimple_phi>(
GET_CONST_NODE(tn));
2346 return mux_time_unit(fu_prec);
2351 return GetFuType(op_graph->CGetOpNodeInfo(operation)->GetNodeId());
2356 unsigned int fu_type = 0;
2357 if(not is_vertex_bounded_with(operation, fu_type))
2360 if(fu_set.size() > 1)
2362 for(
const auto fu : fu_set)
2370 return *(fu_set.begin());
2382 STR(fu_prec) +
"_" +
STR(fu_prec));
2383 auto* fu_mux = GetPointer<functional_unit>(f_unit_mux);
2384 auto area = fu_mux->area_m->get_resource_value(area_info::SLICE_LUTS);
2387 area = fu_mux->area_m->get_area_value() - 1.0;
2394 auto fu_prec = get_prec(fu_name);
2396 return estimate_muxNto1_area(fu_prec, 2);
2401 return 0.5 * EstimateControllerDelay();
2406 const double states_number_normalization =
parameters->IsParameter(
"StatesNumberNormalization") ?
2407 parameters->GetParameter<
double>(
"StatesNumberNormalization") :
2409 if(not
parameters->getOption<
bool>(OPT_estimate_logic_and_connections))
2414 boost::num_vertices(*hls_manager->CGetFunctionBehavior(function_index)->CGetBBGraph(
FunctionBehavior::BB)) +
2415 get_n_complex_operations();
2424 n_states_factor =
static_cast<double>(n_states) / states_number_normalization;
2426 unsigned int fu_prec = 16;
2432 "_" +
STR(fu_prec) +
"_" +
STR(fu_prec) +
"_0");
2433 auto* fu = GetPointer<functional_unit>(f_unit);
2435 auto* op = GetPointer<operation>(op_node);
2436 double delay = time_m_execution_time(op);
2437 delay = delay * controller_delay_multiplier *
2438 ((1 -
exp(-n_states_factor)) +
2440 if(delay < 2 * get_setup_hold_time())
2442 delay = 2 * get_setup_hold_time();
2445 "---Controller delay is " +
STR(delay) +
" while n_states is " +
STR(n_states));
2453 return std::string(
"");
2457 return std::string(
"_3");
2461 return std::string(
"_4");
2470 #define ARRAY_CORRECTION 0 2472 unsigned int n_ins)
const 2474 double res_value = get_setup_hold_time();
2476 std::string memory_type = GetPointer<functional_unit>(current_fu)->memory_type;
2477 std::string memory_ctrl_type = GetPointer<functional_unit>(current_fu)->memory_ctrl_type;
2479 "-->Computing correction time of '" + operation_name +
"'" +
2480 (memory_type !=
"" ?
"(" + memory_type +
")" :
"") +
2481 (memory_ctrl_type !=
"" ?
"(" + memory_ctrl_type +
")" :
""));
2483 unsigned long long elmt_bitsize = 0;
2484 bool is_read_only_correction =
false;
2485 bool is_proxied_correction =
false;
2486 bool is_a_proxy =
false;
2487 bool is_private_correction =
false;
2488 bool is_single_variable =
false;
2489 auto single_var_lambda = [&](
unsigned var) ->
bool {
2503 if(GetPointer<functional_unit>(current_fu)->component_timing_alias !=
"")
2506 std::string component_name = GetPointer<functional_unit>(current_fu)->component_timing_alias;
2507 std::string library = HLS_D->get_technology_manager()->get_library(component_name);
2508 technology_nodeRef f_unit_alias = HLS_D->get_technology_manager()->get_fu(component_name, library);
2509 THROW_ASSERT(f_unit_alias,
"Library miss component: " + component_name);
2510 functional_unit * fu_alias = GetPointer<functional_unit>(f_unit_alias);
2512 operation * op_alias = op_alias_node ? GetPointer<operation>(op_alias_node) : GetPointer<operation>(fu_alias->
get_operations().front());
2513 double alias_exec_time = op_alias->
time_m->
get_initiation_time() != 0u ? time_m_stage_period(op_alias) : time_m_execution_time(op_alias);
2516 operation * op_cur = GetPointer<operation>(op_cur_node);
2517 double cur_exec_time = op_cur->
time_m->
get_initiation_time() != 0u ? time_m_stage_period(op_cur) : time_m_execution_time(op_cur);
2518 res_value += cur_exec_time - alias_exec_time;
2524 "---Applying memory correction for MEMORY_TYPE_SYNCHRONOUS_UNALIGNED");
2525 unsigned var = get_memory_var(fu);
2526 if(!Rmem->is_a_proxied_variable(var))
2528 is_proxied_correction =
true;
2530 else if(Rmem->is_private_memory(var))
2532 is_private_correction =
true;
2534 if(Rmem->is_read_only_variable(var))
2536 is_read_only_correction =
true;
2538 is_single_variable = single_var_lambda(var);
2540 elmt_bitsize = Rmem->get_bram_bitsize();
2542 #if ARRAY_CORRECTION 2546 std::vector<unsigned int> dims;
2548 unsigned int n_not_power_of_two = 0;
2549 for(
auto idx : dims)
2551 ++n_not_power_of_two;
2552 if(dims.size() > 1 && n_not_power_of_two > 0)
2557 TM->
get_fu(
ADDER_STD + std::string(
"_" +
STR(bus_addr_bitsize) +
"_" +
STR(bus_addr_bitsize) +
"_" +
2558 STR(bus_addr_bitsize)),
2561 std::string(
"_" +
STR(bus_addr_bitsize) +
"_" +
STR(bus_addr_bitsize) +
"_" +
2562 STR(bus_addr_bitsize)));
2565 operation* op = GetPointer<operation>(op_node);
2566 double delay = time_m_execution_time(op) - get_setup_hold_time();
2567 unsigned int n_levels = 0;
2568 for(; dims.size() >= (1u << n_levels); ++n_levels)
2570 res_value -= (n_levels - 1) * delay;
2578 "---Applying memory correction for MEMORY_TYPE_ASYNCHRONOUS");
2579 unsigned var = get_memory_var(fu);
2580 if(!Rmem->is_a_proxied_variable(var))
2582 is_proxied_correction =
true;
2584 if(Rmem->is_read_only_variable(var))
2586 is_read_only_correction =
true;
2588 is_single_variable = single_var_lambda(var);
2592 #if ARRAY_CORRECTION 2596 unsigned int n_not_power_of_two = 0;
2597 for(
auto idx : dims)
2599 ++n_not_power_of_two;
2600 if((dims.size() > 1 && n_not_power_of_two > 0))
2605 TM->
get_fu(
ADDER_STD + std::string(
"_" +
STR(bus_addr_bitsize) +
"_" +
STR(bus_addr_bitsize) +
"_" +
2606 STR(bus_addr_bitsize)),
2609 std::string(
"_" +
STR(bus_addr_bitsize) +
"_" +
STR(bus_addr_bitsize) +
"_" +
2610 STR(bus_addr_bitsize)));
2613 operation* op = GetPointer<operation>(op_node);
2614 double delay = time_m_execution_time(op) - get_setup_hold_time();
2615 unsigned int n_levels = 0;
2616 for(; dims.size() >= (1u << n_levels); ++n_levels)
2618 res_value -= (n_levels - 1) * delay;
2627 "---Applying memory correction for MEMORY_TYPE_SYNCHRONOUS_SDS and MEMORY_TYPE_SYNCHRONOUS_SDS_BUS");
2628 unsigned var = get_memory_var(fu);
2629 is_single_variable = single_var_lambda(var);
2633 #if ARRAY_CORRECTION 2637 unsigned int n_not_power_of_two = 0;
2638 for(
auto idx : dims)
2640 ++n_not_power_of_two;
2641 if((dims.size() > 1 && n_not_power_of_two > 0))
2646 TM->
get_fu(
ADDER_STD + std::string(
"_" +
STR(bus_addr_bitsize) +
"_" +
STR(bus_addr_bitsize) +
"_" +
2647 STR(bus_addr_bitsize)),
2650 std::string(
"_" +
STR(bus_addr_bitsize) +
"_" +
STR(bus_addr_bitsize) +
"_" +
2651 STR(bus_addr_bitsize)));
2654 operation* op = GetPointer<operation>(op_node);
2655 double delay = time_m_execution_time(op) - get_setup_hold_time();
2656 unsigned int n_levels = 0;
2657 for(; dims.size() >= (1u << n_levels); ++n_levels)
2659 res_value -= (n_levels - 1) * delay;
2669 unsigned var = proxy_memory_units.find(fu)->second;
2671 "---Applying memory correction for PROXY for var:" +
STR(var));
2672 if(Rmem->is_read_only_variable(var))
2674 is_read_only_correction =
true;
2676 is_single_variable = single_var_lambda(var);
2678 auto* fu_cur = GetPointerS<functional_unit>(current_fu);
2680 std::string latency_postfix =
2683 get_latency_string(fu_cur->bram_load_latency);
2685 auto* op_cur = GetPointerS<operation>(op_cur_node);
2686 double cur_exec_time =
2687 op_cur->time_m->get_initiation_time() != 0u ? time_m_stage_period(op_cur) : time_m_execution_time(op_cur);
2688 double cur_exec_delta;
2690 if(Rmem->is_sds_var(var))
2695 if(Rmem->is_private_memory(var))
2715 if(Rmem->is_private_memory(var))
2740 if(Rmem->is_private_memory(var))
2742 is_private_correction =
true;
2744 elmt_bitsize = Rmem->get_bram_bitsize();
2747 auto* fu_sds = GetPointer<functional_unit>(f_unit_sds);
2749 auto* op_sds = GetPointer<operation>(op_sds_node);
2750 double cur_sds_exec_time =
2751 op_sds->time_m->get_initiation_time() != 0u ? time_m_stage_period(op_sds) : time_m_execution_time(op_sds);
2752 cur_exec_delta = cur_exec_time - cur_sds_exec_time;
2753 res_value = res_value + cur_exec_delta;
2755 #if ARRAY_CORRECTION 2760 unsigned int n_not_power_of_two = 0;
2761 for(
auto idx : dims)
2763 ++n_not_power_of_two;
2764 if(dims.size() > 1 && n_not_power_of_two > 0)
2769 TM->
get_fu(
ADDER_STD + std::string(
"_" +
STR(bus_addr_bitsize) +
"_" +
STR(bus_addr_bitsize) +
"_" +
2770 STR(bus_addr_bitsize)),
2774 operation* op = GetPointer<operation>(op_node);
2775 double delay = time_m_execution_time(op) - get_setup_hold_time();
2776 unsigned int n_levels = 0;
2777 for(; dims.size() >= (1u << n_levels); ++n_levels)
2779 res_value -= (n_levels - 1) * delay;
2786 elmt_bitsize = Rmem->get_bram_bitsize();
2788 else if(is_single_bool_test_cond_expr_units(fu))
2790 auto prec = get_prec(fu);
2795 auto true_delay = [&]() ->
double {
2797 auto* fu_ce = GetPointer<functional_unit>(f_unit_ce);
2799 auto* op_ce = GetPointer<operation>(op_ce_node);
2800 double setup_time = get_setup_hold_time();
2801 return time_m_execution_time(op_ce) - setup_time;
2804 auto* fu_ce = GetPointer<functional_unit>(f_unit_ce);
2806 auto* op_ce = GetPointer<operation>(op_ce_node);
2807 double setup_time = get_setup_hold_time();
2808 double ce_delay = time_m_execution_time(op_ce) - setup_time;
2809 double correction = ce_delay - true_delay;
2810 res_value = res_value + correction;
2813 else if(is_simple_pointer_plus_expr(fu))
2817 auto* fu_ce = GetPointer<functional_unit>(f_unit_ce);
2819 auto* op_ce = GetPointer<operation>(op_ce_node);
2820 double setup_time = get_setup_hold_time();
2821 double ce_delay = time_m_execution_time(op_ce) - setup_time;
2822 double correction = ce_delay;
2823 res_value = res_value + correction;
2825 else if(operation_name ==
"lut_expr")
2828 if(HLS_D->has_parameter(
"max_lut_size") && HLS_D->get_parameter<
size_t>(
"max_lut_size") != 0)
2832 auto* fu_lut = GetPointer<functional_unit>(f_unit_lut);
2834 auto* op_lut = GetPointer<operation>(op_lut_node);
2835 double setup_time = get_setup_hold_time();
2836 double lut_delay = time_m_execution_time(op_lut) - setup_time;
2837 res_value = res_value + lut_delay;
2838 auto max_lut_size = HLS_D->get_parameter<
size_t>(
"max_lut_size");
2839 if(n_ins > max_lut_size)
2845 auto delta_delay = (lut_delay * 1.) /
static_cast<double>(max_lut_size);
2847 res_value = res_value -
static_cast<double>(n_ins) * delta_delay;
2853 "---Correction value after first correction " +
STR(res_value));
2854 double bus_multiplier = 0;
2855 if(elmt_bitsize == 128)
2857 bus_multiplier = -1.0;
2859 else if(elmt_bitsize == 64)
2861 bus_multiplier = -0.5;
2863 else if(elmt_bitsize == 32)
2867 else if(elmt_bitsize == 16)
2869 bus_multiplier = +0;
2871 else if(elmt_bitsize == 8)
2873 bus_multiplier = +0;
2875 res_value = res_value + bus_multiplier * (get_setup_hold_time() / time_multiplier);
2876 if(is_read_only_correction)
2879 res_value = res_value + memory_correction_coefficient * 0.5 * (get_setup_hold_time() / time_multiplier);
2881 if(is_proxied_correction)
2885 res_value + memory_correction_coefficient * (estimate_mux_time(fu) / (mux_time_multiplier * time_multiplier));
2887 if(is_private_correction)
2891 res_value + memory_correction_coefficient * (estimate_mux_time(fu) / (mux_time_multiplier * time_multiplier));
2893 if(is_single_variable)
2897 auto fname = get_fu_name(fu).first;
2899 auto* fu_sv = GetPointer<functional_unit>(f_unit_sv);
2901 auto* op_sv = GetPointer<operation>(op_sv_node);
2902 double setup_time = get_setup_hold_time();
2903 double cur_sv_exec_time =
2904 op_sv->time_m->get_initiation_time() != 0u ? time_m_stage_period(op_sv) : time_m_execution_time(op_sv);
2905 if(is_a_proxy || is_proxied_correction)
2907 res_value = cur_sv_exec_time - setup_time;
2911 double sv_delay = cur_sv_exec_time - 2 * setup_time;
2912 double correction = sv_delay;
2913 res_value = res_value + correction;
2923 double clock_budget = HLS_C->get_clock_period_resource_fraction() * HLS_C->get_clock_period();
2924 double scheduling_mux_margins =
parameters->getOption<
double>(OPT_scheduling_mux_margins) * mux_time_unit(32);
2926 parameters->isOption(OPT_disable_function_proxy) &&
parameters->getOption<
bool>(OPT_disable_function_proxy);
2930 call_delay = clock_budget;
2934 call_delay = clock_budget;
2937 "---Minimum slack " +
2942 if(call_delay < 0.0)
2944 call_delay = get_setup_hold_time();
2946 auto ctrl_delay = EstimateControllerDelay();
2947 if(call_delay < ctrl_delay)
2949 call_delay = ctrl_delay;
2952 std::string function_name = behavioral_helper->get_function_name();
2954 auto* fu = GetPointer<functional_unit>(HLS_D->get_technology_manager()->get_fu(module_name,
WORK_LIBRARY));
2955 auto* op = GetPointer<operation>(fu->get_operation(function_name));
2960 call_delay += EstimateControllerDelay();
2962 if(call_delay >= clock_budget - scheduling_mux_margins)
2964 call_delay = clock_budget - scheduling_mux_margins;
2972 double mux_area = estimate_mux_area(fu_s1);
2973 double resource_area =
2974 is_single_bool_test_cond_expr_units(fu_s1) ? (mux_area > 1 ? (mux_area - 1) : 0) : get_area(fu_s1);
2975 if(resource_area > mux_area && resource_area - mux_area < 4)
2977 resource_area = mux_area;
2979 const auto fu_name = list_of_FU[fu_s1]->get_name();
2981 (fu_name.find(
"max_expr_FU_") != std::string::npos || fu_name.find(
"min_expr_FU_") != std::string::npos))
2983 resource_area = 0.0;
2985 return (resource_area / mux_area) + 3 * get_DSPs(fu_s1);
2990 return n_complex_operations;
2997 std::string unit_name;
2998 if(prec_in == 32 && prec_out == 64)
3002 else if(prec_in == 64 && prec_out == 32)
3008 THROW_ERROR(
"not supported float to float conversion: " +
STR(prec_in) +
" " +
STR(prec_out));
3010 current_fu = get_fu(unit_name, hls_manager);
3016 if(!has_to_be_synthetized(fu_name))
3020 return GetPointer<functional_unit>(list_of_FU[fu_name])->characterizing_constant_value !=
"";
3025 THROW_ASSERT(fu_name < get_number_fu_types(),
"functional unit id not meaningful");
3026 return proxy_memory_units.find(fu_name) != proxy_memory_units.end();
3031 THROW_ASSERT(fu_name < get_number_fu_types(),
"functional unit id not meaningful");
3032 return (is_memory_unit(fu_name) && Rmem->is_read_only_variable(get_memory_var(fu_name))) ||
3033 (is_proxy_memory_unit(fu_name) && Rmem->is_read_only_variable(get_proxy_memory_var(fu_name)));
3038 THROW_ASSERT(fu_name < get_number_fu_types(),
"functional unit id not meaningful");
3039 return single_bool_test_cond_expr_units.find(fu_name) != single_bool_test_cond_expr_units.end();
3044 THROW_ASSERT(fu_name < get_number_fu_types(),
"functional unit id not meaningful");
3045 return simple_pointer_plus_expr.find(fu_name) != simple_pointer_plus_expr.end();
3050 if(!has_to_be_synthetized(fu_name))
3055 unsigned int max_value = 0;
3056 auto no_it_end = node_ops.end();
3057 for(
auto no_it = node_ops.begin(); no_it != no_it_end; ++no_it)
3059 max_value =
std::max(max_value, GetPointer<operation>(*no_it)->time_m->get_cycles());
3066 auto clock_period = HLS_C->get_clock_period();
3067 auto clock_period_resource_fraction = HLS_C->get_clock_period_resource_fraction();
3068 auto scheduling_mux_margins =
parameters->getOption<
double>(OPT_scheduling_mux_margins) * mux_time_unit(32);
3069 auto setup_hold_time = get_setup_hold_time();
3071 return clock_period - ((clock_period * clock_period_resource_fraction) - scheduling_mux_margins - setup_hold_time);
3076 const auto bb_version = hls_manager->CGetFunctionBehavior(function_index)->GetBBVersion();
3077 if(ssa_bb_versions.find(ssa) != ssa_bb_versions.end() &&
3078 ssa_bb_versions.find(ssa)->second == std::pair<unsigned int, AbsControlStep>(bb_version, cs))
3081 "---Compute roots - Using cached values of " +
STR(ssa) +
" at version " +
STR(bb_version));
3082 return ssa_roots.find(ssa)->second;
3086 const auto schedule =
hls->
Rsch;
3090 ssa_to_be_analyzeds.insert(ssa);
3092 "-->Computing roots of " +
STR(ssa) +
" at version " +
STR(bb_version));
3093 while(ssa_to_be_analyzeds.size())
3095 const auto current_tn_index = *(ssa_to_be_analyzeds.begin());
3096 ssa_to_be_analyzeds.erase(ssa_to_be_analyzeds.begin());
3097 if(already_analyzed_ssas.find(current_tn_index) != already_analyzed_ssas.end())
3101 already_analyzed_ssas.insert(current_tn_index);
3103 "-->Considering " +
STR(TreeM->CGetTreeNode(current_tn_index)));
3104 const auto current_sn = GetPointer<const ssa_name>(TreeM->CGetTreeNode(current_tn_index));
3110 const auto current_sn_def = current_sn->CGetDefStmt();
3112 if(schedule->is_scheduled(current_sn_def->index) && schedule->get_cstep(current_sn_def->index) != cs && cs.second !=
AbsControlStep::UNKNOWN)
3119 cs.first != GetPointer<const gimple_node>(
GET_NODE(current_sn_def))->bb_index)
3124 const auto current_def_ga = GetPointer<const gimple_assign>(
GET_NODE(current_sn_def));
3125 if(not current_def_ga)
3128 roots.insert(current_tn_index);
3131 const auto be = GetPointer<const binary_expr>(
GET_NODE(current_def_ga->op1));
3133 (be->get_kind() == rshift_expr_K || be->get_kind() == lshift_expr_K || be->get_kind() == bit_and_expr_K))
3135 if(
GET_NODE(be->op1)->get_kind() != integer_cst_K)
3138 "<--Adding as root " + current_sn->ToString() +
3139 " which is defined in a shift by variable or in an and with a variable");
3140 roots.insert(current_tn_index);
3146 "<--Defined in a shift by constant or in an and with a constant");
3147 if(already_analyzed_ssas.find(be->op0->index) == already_analyzed_ssas.end())
3149 ssa_to_be_analyzeds.insert(be->op0->index);
3154 if(be && (be->get_kind() == gt_expr_K || be->get_kind() == ge_expr_K || be->get_kind() == lt_expr_K ||
3155 be->get_kind() == le_expr_K || be->get_kind() == eq_expr_K || be->get_kind() == ne_expr_K ||
3156 be->get_kind() == truth_and_expr_K || be->get_kind() == truth_or_expr_K ||
3157 be->get_kind() == truth_xor_expr_K))
3160 if(already_analyzed_ssas.find(be->op0->index) == already_analyzed_ssas.end())
3162 ssa_to_be_analyzeds.insert(be->op0->index);
3164 if(already_analyzed_ssas.find(be->op1->index) == already_analyzed_ssas.end())
3166 ssa_to_be_analyzeds.insert(be->op1->index);
3170 const auto ue = GetPointer<const unary_expr>(
GET_NODE(current_def_ga->op1));
3171 if(ue && (ue->get_kind() == truth_not_expr_K || ue->get_kind() == nop_expr_K))
3174 if(already_analyzed_ssas.find(ue->op->index) == already_analyzed_ssas.end())
3176 ssa_to_be_analyzeds.insert(ue->op->index);
3180 const auto ce = GetPointer<const cond_expr>(
GET_NODE(current_def_ga->op1));
3184 if(already_analyzed_ssas.find(ce->op0->index) == already_analyzed_ssas.end())
3186 ssa_to_be_analyzeds.insert(ce->op0->index);
3188 if(already_analyzed_ssas.find(ce->op1->index) == already_analyzed_ssas.end())
3190 ssa_to_be_analyzeds.insert(ce->op1->index);
3192 if(already_analyzed_ssas.find(ce->op2->index) == already_analyzed_ssas.end())
3194 ssa_to_be_analyzeds.insert(ce->op2->index);
3199 roots.insert(current_tn_index);
3201 ssa_bb_versions[ssa] = std::pair<unsigned int, AbsControlStep>(bb_version, cs);
3202 ssa_roots[ssa] = roots;
3204 "<--Computed roots of " +
STR(ssa) +
" at version " +
STR(bb_version) +
": " +
STR(roots.size()) +
3212 const auto bb_version = hls_manager->CGetFunctionBehavior(function_index)->GetBBVersion();
3213 if(cond_expr_bb_versions.find(ssa) != cond_expr_bb_versions.end() &&
3214 cond_expr_bb_versions.find(ssa)->second == bb_version)
3217 "---Computing cond_exprs starting from " +
STR(TreeM->CGetTreeNode(ssa)) +
3218 " - Using cached values");
3219 return ssa_cond_exprs.find(ssa)->second;
3224 "-->Computing cond_exprs starting from " +
STR(TreeM->CGetTreeNode(ssa)));
3228 ssa_to_be_analyzeds.insert(ssa);
3229 while(ssa_to_be_analyzeds.size())
3231 const auto current_tn_index = *(ssa_to_be_analyzeds.begin());
3232 ssa_to_be_analyzeds.erase(ssa_to_be_analyzeds.begin());
3234 "-->Considering " +
STR(TreeM->CGetTreeNode(current_tn_index)));
3235 const auto current_sn = GetPointer<const ssa_name>(TreeM->CGetTreeNode(current_tn_index));
3241 for(
const auto& use_stmt : current_sn->CGetUseStmts())
3244 if(
GET_NODE(use_stmt.first)->get_kind() != gimple_assign_K)
3249 const auto current_use_ga = GetPointer<const gimple_assign>(
GET_NODE(use_stmt.first));
3250 const auto be = GetPointer<const binary_expr>(
GET_NODE(current_use_ga->op1));
3252 (be->get_kind() == rshift_expr_K || be->get_kind() == lshift_expr_K || be->get_kind() == bit_and_expr_K))
3254 if(
GET_NODE(be->op1)->get_kind() != integer_cst_K)
3257 "<--Used in a shift by a variable or in an and with a variable");
3263 "<--Used in a shift by constant or in an and with a constant");
3264 if(already_analyzed_ssas.find(current_use_ga->op0->index) == already_analyzed_ssas.end())
3266 ssa_to_be_analyzeds.insert(current_use_ga->op0->index);
3271 if(be && (be->get_kind() == gt_expr_K || be->get_kind() == ge_expr_K || be->get_kind() == lt_expr_K ||
3272 be->get_kind() == le_expr_K || be->get_kind() == eq_expr_K || be->get_kind() == ne_expr_K ||
3273 be->get_kind() == truth_and_expr_K || be->get_kind() == truth_or_expr_K ||
3274 be->get_kind() == truth_xor_expr_K))
3277 if(already_analyzed_ssas.find(current_use_ga->op0->index) == already_analyzed_ssas.end())
3279 ssa_to_be_analyzeds.insert(current_use_ga->op0->index);
3283 const auto ue = GetPointer<const unary_expr>(
GET_NODE(current_use_ga->op1));
3284 if(ue && ue->get_kind() == truth_not_expr_K)
3287 if(already_analyzed_ssas.find(current_use_ga->op0->index) == already_analyzed_ssas.end())
3289 ssa_to_be_analyzeds.insert(current_use_ga->op0->index);
3293 if(ue && ue->get_kind() == nop_expr_K)
3296 if(already_analyzed_ssas.find(current_use_ga->op0->index) == already_analyzed_ssas.end())
3298 ssa_to_be_analyzeds.insert(current_use_ga->op0->index);
3302 if(
GET_NODE(current_use_ga->op1)->get_kind() == cond_expr_K)
3304 const auto ce = GetPointer<const cond_expr>(
GET_NODE(current_use_ga->op1));
3305 if(ce->op0->index != current_tn_index)
3308 "<--Used as operand of a cond_expr, but not as condition");
3311 cond_expr_ga_indices.insert(current_use_ga->index);
3322 "<--Considered " +
STR(TreeM->CGetTreeNode(current_tn_index)));
3324 cond_expr_bb_versions[ssa] = bb_version;
3325 ssa_cond_exprs[ssa] = cond_expr_ga_indices;
3327 "<--Computed cond_exprs starting from " +
STR(TreeM->CGetTreeNode(ssa)));
3328 return cond_expr_ga_indices;
3335 const auto first_operation_index = op_graph->CGetOpNodeInfo(first_operation)->GetNodeId();
3336 const auto second_operation_index = second_operation ? op_graph->CGetOpNodeInfo(second_operation)->GetNodeId() : 0;
3337 return GetConnectionTime(first_operation_index, second_operation_index, cs);
3343 if(!
parameters->getOption<
bool>(OPT_estimate_logic_and_connections))
3347 if(second_operation == 0)
3354 double end_delay = 0.0;
3355 const auto first_operation_tn = TreeM->CGetTreeNode(first_operation);
3356 if(GetPointer<const gimple_multi_way_if>(first_operation_tn) ||
3357 GetPointer<const gimple_switch>(first_operation_tn) || GetPointer<const gimple_cond>(first_operation_tn))
3359 end_delay = estimate_controller_delay_fb();
3363 double phi_delay = GetPhiConnectionLatency(first_operation);
3364 if(phi_delay > end_delay)
3366 end_delay = phi_delay;
3368 double to_dsp_register_delay = GetToDspRegisterDelay(first_operation);
3369 if(to_dsp_register_delay > end_delay)
3371 end_delay = to_dsp_register_delay;
3375 "<--Got end delay of " +
STR(first_operation) +
": " +
STR(end_delay));
3378 if(is_operation_PI_registered(second_operation))
3381 "-->Computing overall connection time " +
STR(first_operation) +
"-->" +
STR(second_operation) +
3382 " Second operation has registered inputs");
3383 const auto second_operation_tn = TreeM->CGetTreeNode(second_operation);
3384 const auto second_operation_name = GetPointer<const gimple_node>(second_operation_tn)->
operation;
3385 const auto called_function = TreeM->GetFunction(second_operation_name);
3386 THROW_ASSERT(called_function,
STR(second_operation_tn) +
" has registered inputs but it is not a call");
3387 const auto called_hls = hls_manager->get_HLS(called_function->index);
3388 const auto called_sites_number = called_hls->call_sites_number;
3390 double mux_delay = 0.0;
3391 unsigned int n_levels = 0;
3392 for(; called_sites_number > (1u << n_levels); ++n_levels)
3396 mux_delay = (n_levels * mux_time_unit(32));
3398 "<--Got connection time " +
STR(first_operation) +
"-->" +
STR(second_operation) +
": " +
3403 second_operation !=
EXIT_ID &&
3404 (behavioral_helper->IsLut(first_operation) || behavioral_helper->IsLut(second_operation)))
3410 const auto second_operation_tn = TreeM->CGetTreeNode(second_operation);
3413 double connection_time = 0.0;
3415 "-->Computing overall connection time " +
STR(first_operation) +
"-->" +
STR(second_operation));
3418 "-->Computing connection time due to fanout " +
STR(first_operation) +
"-->" +
3419 STR(second_operation));
3422 const auto used_ssa_sn = GetPointer<const ssa_name>(
GET_NODE(used_ssa.first));
3423 if(used_ssa_sn && used_ssa_sn->CGetDefStmt()->index == first_operation)
3426 const auto roots = ComputeRoots(used_ssa_sn->index, cs);
3427 if(roots.find(used_ssa_sn->index) != roots.end())
3430 const auto this_cond_expr = ComputeDrivenCondExpr(used_ssa_sn->index);
3431 cond_expr_ga_indices.insert(this_cond_expr.begin(), this_cond_expr.end());
3435 for(
const auto cond_expr_ga_index : cond_expr_ga_indices)
3437 const auto current_ga = GetPointer<const gimple_assign>(TreeM->CGetTreeNode(cond_expr_ga_index));
3438 const auto cond_def_sn = GetPointer<const ssa_name>(
GET_NODE(current_ga->op0));
3439 const auto local_fo =
tree_helper::Size(current_ga->op0) * cond_def_sn->CGetNumberUses();
3441 "---Incrementing fan out of " +
STR(local_fo) +
" because of " +
3442 STR(TreeM->CGetTreeNode(cond_expr_ga_index)));
3446 "<--Estimated FO of " +
STR(first_operation) +
"-->" +
STR(second_operation) +
" = " +
3450 double fo_correction = fanout_coefficient * get_setup_hold_time() *
static_cast<double>(n_fo);
3451 if(fo_correction < connection_offset)
3453 fo_correction = connection_offset;
3455 else if(fo_correction > 1.1 * (connection_offset + get_setup_hold_time()))
3457 fo_correction = 1.1 * (connection_offset + get_setup_hold_time());
3460 "---Computed connection time due to fanout " +
STR(first_operation) +
"-->" +
3461 STR(second_operation) +
": " +
STR(fo_correction));
3462 connection_time += fo_correction;
3467 "---Computed connection time due to fanout" +
STR(first_operation) +
"-->" +
3468 STR(second_operation) +
": 0.0");
3475 "---Input data (coming from other operation) is " +
STR(used_ssa.first));
3479 const bool is_load_store =
3480 behavioral_helper->IsLoad(second_operation) || behavioral_helper->IsStore(second_operation);
3484 "-->Computing connection time for load and store" +
STR(first_operation) +
"-->" +
3485 STR(second_operation));
3486 const auto fu_type = GetFuType(second_operation);
3487 bool is_array = is_direct_access_memory_unit(fu_type);
3489 is_array ? (is_memory_unit(fu_type) ? get_memory_var(fu_type) : get_proxy_memory_var(fu_type)) : 0;
3490 auto nchannels = get_number_channels(fu_type);
3491 if(var && hls_manager->Rmem->get_maximum_references(var) > (2 * nchannels))
3495 THROW_ERROR(
"nchannels should be different than zero");
3497 const auto ret = estimate_muxNto1_delay(
3499 static_cast<unsigned int>(hls_manager->Rmem->get_maximum_references(var)) / (2 * nchannels));
3500 connection_time += ret;
3503 "<--Computed connection time for load and store " +
STR(first_operation) +
"-->" +
3504 STR(second_operation) +
": 0.0");
3508 const auto first_operation_tn = TreeM->CGetTreeNode(first_operation);
3509 const bool is_first_load = behavioral_helper->IsLoad(first_operation);
3513 "-->Computing connection time of load " +
STR(first_operation) +
"-->" +
3514 STR(second_operation));
3515 const auto fu_type = GetFuType(first_operation);
3516 bool is_array = is_direct_access_memory_unit(fu_type);
3518 is_array ? (is_memory_unit(fu_type) ? get_memory_var(fu_type) : get_proxy_memory_var(fu_type)) : 0;
3519 auto nchannels = get_number_channels(fu_type);
3520 if(var && hls_manager->Rmem->get_maximum_loads(var) > (nchannels))
3524 THROW_ERROR(
"nchannels should be different than zero");
3526 auto ret = estimate_muxNto1_delay(get_prec(fu_type),
3527 static_cast<unsigned int>(hls_manager->Rmem->get_maximum_loads(var)) /
3529 if(ret > (2.5 * get_setup_hold_time()))
3531 ret = 2.5 * get_setup_hold_time();
3534 "<--Computed connection time of load " +
STR(first_operation) +
"-->" +
3535 STR(second_operation) +
": " +
STR(ret) +
" var=" +
STR(var));
3536 connection_time += ret;
3541 "<--Computed connection time of load " +
STR(first_operation) +
"-->" +
3542 STR(second_operation) +
": 0.0");
3545 else if(GetPointer<const gimple_node>(first_operation_tn)->
operation !=
"STORE")
3547 if(CanImplementSetNotEmpty(first_operation))
3549 const auto fu_type = GetFuType(first_operation);
3550 const auto n_resources = get_number_fu(fu_type);
3553 auto ret = estimate_muxNto1_delay(get_prec(fu_type), max_number_of_operations(fu_type) / n_resources);
3556 connection_time += ret;
3562 if(first_operation !=
ENTRY_ID && TreeM->CGetTreeNode(first_operation)->get_kind() == gimple_assign_K)
3564 const auto first_operation_tn = TreeM->CGetTreeNode(first_operation);
3565 const auto ga = GetPointer<const gimple_assign>(first_operation_tn);
3566 const auto ne = GetPointer<const nop_expr>(
GET_CONST_NODE(ga->op1));
3572 double fo_correction = 0.0;
3578 const auto output_sn =
3579 GetPointer<const ssa_name>(
GET_CONST_NODE(GetPointer<const gimple_assign>(first_operation_tn)->op0));
3580 const auto input_sn = GetPointer<const ssa_name>(
GET_CONST_NODE(ne->op));
3584 fo_correction = fanout_coefficient * get_setup_hold_time() *
static_cast<double>(fanout);
3585 if(fo_correction < connection_offset)
3587 fo_correction = connection_offset;
3589 else if(fo_correction > 1.1 * (connection_offset + get_setup_hold_time()))
3591 fo_correction = 1.1 * (connection_offset + get_setup_hold_time());
3593 connection_time += fo_correction;
3597 "<--Computed connection time due to conversion " +
STR(first_operation) +
"-->" +
3598 STR(second_operation) +
"(fanout " +
STR(fanout) +
") : " +
STR(fo_correction));
3601 if(CanImplementSetNotEmpty(first_operation) && get_DSPs(GetFuType(first_operation)) != 0.0)
3603 connection_time += output_DSP_connection_time;
3605 "---Connection time due to DSP connection " +
STR(output_DSP_connection_time));
3607 if(first_operation !=
ENTRY_ID && TreeM->CGetTreeNode(first_operation)->get_kind() == gimple_assign_K)
3609 const auto first_operation_tn = TreeM->CGetTreeNode(first_operation);
3610 const auto op1_kind =
GET_CONST_NODE(GetPointer<const gimple_assign>(first_operation_tn)->op1)->get_kind();
3611 if(op1_kind == plus_expr_K || op1_kind == minus_expr_K || op1_kind == ternary_plus_expr_K ||
3612 op1_kind == ternary_pm_expr_K || op1_kind == ternary_mp_expr_K || op1_kind == ternary_mm_expr_K ||
3613 op1_kind == eq_expr_K || op1_kind == ne_expr_K || op1_kind == gt_expr_K || op1_kind == ge_expr_K ||
3614 op1_kind == lt_expr_K || op1_kind == le_expr_K || op1_kind == pointer_plus_expr_K)
3616 const bool adding_connection = [&]() ->
bool {
3618 if(second_delay.first >
epsilon)
3622 const auto first_bb_index =
3623 GetPointer<const gimple_assign>(TreeM->CGetTreeNode(first_operation))->bb_index;
3624 const auto zero_distance_operations = GetZeroDistanceOperations(second_operation);
3625 for(
const auto zero_distance_operation : zero_distance_operations)
3627 if(GetPointer<const gimple_node>(TreeM->CGetTreeNode(zero_distance_operation))->bb_index ==
3639 if(adding_connection)
3641 connection_time += output_carry_connection_time;
3643 "---Connection time due to carry connection " +
STR(output_carry_connection_time));
3648 if(CanImplementSetNotEmpty(first_operation) && is_indirect_access_memory_unit(GetFuType(first_operation)))
3651 const auto bus_delay = get_setup_hold_time() * 2.5;
3653 connection_time += bus_delay;
3657 const bool is_second_ternary_plus =
3658 GetPointer<const gimple_node>(second_operation_tn)->
operation ==
"ternary_plus_expr" ||
3659 GetPointer<const gimple_node>(second_operation_tn)->
operation ==
"ternary_pm_expr" ||
3660 GetPointer<const gimple_node>(second_operation_tn)->
operation ==
"ternary_mp_expr" ||
3661 GetPointer<const gimple_node>(second_operation_tn)->
operation ==
"ternary_mm_expr";
3662 if(is_second_ternary_plus)
3665 auto ret = get_setup_hold_time()/3;
3669 const bool is_second_plus_minus = GetPointer<const gimple_node>(second_operation_tn)->
operation ==
"plus_expr" || GetPointer<const gimple_node>(second_operation_tn)->
operation ==
"minus_expr";
3670 if(is_second_plus_minus)
3672 const auto fu_type = GetFuType(second_operation);
3673 if(get_prec(fu_type) > 32)
3676 auto ret = get_setup_hold_time()/3;
3682 if(!CanBeMerged(first_operation, second_operation))
3684 connection_time =
std::max(connection_time, connection_offset);
3688 "<--Computed overall connection time " +
STR(first_operation) +
"-->" +
STR(second_operation) +
3689 ": " +
STR(connection_time));
3690 return connection_time;
3695 bool is_read_only_variable,
unsigned channel_number)
3698 auto* vd = GetPointer<const var_decl>(var_node);
3700 const auto hls_d = hls_manager->get_HLS_device();
3701 if(is_read_only_variable)
3703 threshold = 32 * threshold;
3705 else if(hls_d->has_parameter(
"max_distram_nn_size") && channel_number > 1)
3707 threshold = hls_d->get_parameter<
unsigned int>(
"max_distram_nn_size");
3712 if(GetPointer<const array_type>(
GET_CONST_NODE(array_type_node)))
3714 std::vector<unsigned long long> dims;
3715 unsigned long long elts_size;
3717 unsigned long long meaningful_bits = 0;
3718 if(vd->bit_values.size() != 0)
3720 for(
auto bit_el : vd->bit_values)
3730 meaningful_bits = elts_size;
3736 if(meaningful_bits != elts_size)
3738 auto real_bitsize = (var_bitsize / elts_size) * meaningful_bits;
3739 return (real_bitsize <= threshold) || (((var_bitsize / elts_size) <= 64) && channel_number == 1);
3743 return (var_bitsize <= threshold) || (((var_bitsize / elts_size) <= 64) && channel_number == 1);
3748 return var_bitsize <= threshold;
3753 return var_bitsize <= threshold;
3766 else if(GetPointer<const gimple_node>(TreeM->CGetTreeNode(
operation))->
operation == LOAD)
3770 else if(CanImplementSetNotEmpty(
operation))
3772 for(
const auto candidate_functional_unit : can_implement_set(
operation))
3774 if(get_DSPs(candidate_functional_unit))
3786 return ControlStep(static_cast<unsigned int>(ceil(et / clock_period)));
3796 "-->Checking if " +
STR(TreeM->CGetTreeNode(first_operation)) +
" can be fused in " +
3797 STR(TreeM->CGetTreeNode(second_operation)));
3800 if( (second_delay.first <=
epsilon &&
3801 second_delay.second <=
epsilon))
3806 const auto ga0 = GetPointer<const gimple_assign>(TreeM->CGetTreeNode(first_operation));
3807 const auto ga1 = GetPointer<const gimple_assign>(TreeM->CGetTreeNode(second_operation));
3809 if(ga0 &&
tree_helper::Size(ga0->op0) == 1 && ga1 &&
tree_helper::Size(ga1->op1) == 1 && (!CanImplementSetNotEmpty(second_operation) || get_DSPs(GetFuType(second_operation)) == 0.0) && ga0->operation !=
"plus_expr" && ga0->operation !=
"minus_expr" && ga0->operation !=
"ternary_plus_expr" && ga0->operation !=
"ternary_mm_expr" && ga0->operation !=
"ternary_mp_expr" && ga0->operation !=
"ternary_pm_expr")
3813 (!CanImplementSetNotEmpty(second_operation) || get_DSPs(GetFuType(second_operation)) == 0.0))
3824 const auto first_statement_index = op_graph->CGetOpNodeInfo(first_statement)->GetNodeId();
3825 const auto second_statement_index = op_graph->CGetOpNodeInfo(second_statement)->GetNodeId();
3826 const auto ret = CanBeChained(first_statement_index, second_statement_index);
3831 const unsigned int second_statement_index)
const 3833 if(first_statement_index ==
ENTRY_ID || first_statement_index ==
EXIT_ID || second_statement_index ==
ENTRY_ID ||
3834 second_statement_index ==
EXIT_ID)
3838 const auto first_tree_node = TreeM->CGetTreeNode(first_statement_index);
3839 const auto second_tree_node = TreeM->CGetTreeNode(second_statement_index);
3841 "-->Checking if (" +
STR(second_statement_index) +
") " +
STR(second_tree_node) +
3842 " can be chained with (" +
STR(first_statement_index) +
") " +
STR(first_tree_node));
3843 auto first_store = behavioral_helper->IsStore(first_statement_index);
3849 if(not is_operation_bounded(first_statement_index))
3854 auto second_load = behavioral_helper->IsLoad(second_statement_index);
3857 first_statement_index,
3858 CanImplementSetNotEmpty(first_statement_index) ? GetFuType(first_statement_index) :
fu_binding::UNKNOWN, 0)
3860 second_load && is_one_cycle_direct_access_memory_unit(GetFuType(second_statement_index)) &&
3861 (!is_readonly_memory_unit(GetFuType(second_statement_index)) ||
3862 (!
parameters->isOption(OPT_rom_duplication) || !
parameters->getOption<
bool>(OPT_rom_duplication))) &&
3863 ((Rmem->get_maximum_references(is_memory_unit(GetFuType(second_statement_index)) ?
3864 get_memory_var(GetFuType(second_statement_index)) :
3865 get_proxy_memory_var(GetFuType(second_statement_index)))) >
3866 get_number_channels(GetFuType(second_statement_index))))
3871 auto first_type = first_tree_node->get_kind();
3872 auto second_store = behavioral_helper->IsStore(second_statement_index);
3874 if((first_type == gimple_cond_K || first_type == gimple_multi_way_if_K) && second_store)
3878 "<--No because stores cannot be executed in the same clock cycle of the condition which controls it");
3882 if((first_type == gimple_cond_K || first_type == gimple_multi_way_if_K) &&
3883 !is_operation_bounded(second_statement_index))
3886 "<--No because unbounded operations cannot be executed in the same clock cycle of the condition " 3887 "which controls it");
3891 if((first_type == gimple_cond_K || first_type == gimple_multi_way_if_K) &&
3892 (second_tree_node->get_kind() == gimple_label_K))
3895 "<--No because labels and nops cannot be executed in the same clock cycle of the condition which " 3900 if((first_type == gimple_cond_K || first_type == gimple_multi_way_if_K) &&
3901 (GetPointer<const gimple_node>(second_tree_node)->vdef))
3904 "<--No because operations with side effect cannot be executed in the same clock cycle of the " 3905 "condition which controls it");
3908 if(first_store && !(!is_operation_bounded(second_statement_index)) &&
3909 is_operation_PI_registered(second_statement_index, GetFuType(second_statement_index)))
3915 if(
parameters->IsParameter(
"bus-no-chain") &&
parameters->GetParameter<
int>(
"bus-no-chain") == 1 &&
3916 ((CanImplementSetNotEmpty(first_statement_index) &&
3917 is_indirect_access_memory_unit(GetFuType(first_statement_index))) ||
3918 (CanImplementSetNotEmpty(second_statement_index) &&
3919 is_indirect_access_memory_unit(GetFuType(second_statement_index)))))
3922 "<--No because one of the operations is an access through bus");
3925 if(
parameters->IsParameter(
"load-store-no-chain") &&
parameters->GetParameter<
int>(
"load-store-no-chain") == 1 &&
3926 (behavioral_helper->IsLoad(first_statement_index) || second_load || first_store || second_store))
3929 "<--No because one of the operations is a load or a store");
3942 behavioral_helper = hls_manager->CGetFunctionBehavior(function_index)->CGetBehavioralHelper();
3943 Rmem = hls_manager->Rmem;
3944 TreeM = hls_manager->get_tree_manager();
3945 connection_time_ratio =
3946 HLS_D->has_parameter(
"connection_time_ratio") ? HLS_D->get_parameter<
double>(
"connection_time_ratio") : 1;
3947 controller_delay_multiplier = HLS_D->has_parameter(
"controller_delay_multiplier") ?
3948 HLS_D->get_parameter<
double>(
"controller_delay_multiplier") :
3950 setup_multiplier = HLS_D->has_parameter(
"setup_multiplier") ? HLS_D->get_parameter<
double>(
"setup_multiplier") : 1.0;
3951 time_multiplier = HLS_D->has_parameter(
"time_multiplier") ? HLS_D->get_parameter<
double>(
"time_multiplier") : 1.0;
3952 mux_time_multiplier =
3953 HLS_D->has_parameter(
"mux_time_multiplier") ? HLS_D->get_parameter<
double>(
"mux_time_multiplier") : 1.0;
3954 memory_correction_coefficient = HLS_D->has_parameter(
"memory_correction_coefficient") ?
3955 HLS_D->get_parameter<
double>(
"memory_correction_coefficient") :
3958 connection_offset =
parameters->IsParameter(
"ConnectionOffset") ?
3959 parameters->GetParameter<
double>(
"ConnectionOffset") :
3960 parameters->IsParameter(
"RelativeConnectionOffset") ?
3961 parameters->GetParameter<
double>(
"RelativeConnectionOffset") * get_setup_hold_time() :
3962 HLS_D->has_parameter(
"RelativeConnectionOffset") ?
3963 HLS_D->get_parameter<
double>(
"RelativeConnectionOffset") * get_setup_hold_time() :
3964 HLS_D->has_parameter(
"ConnectionOffset") ? HLS_D->get_parameter<
double>(
"ConnectionOffset") :
3967 output_DSP_connection_time =
3968 parameters->IsParameter(
"OutputDSPConnectionRatio") ?
3969 parameters->GetParameter<
double>(
"OutputDSPConnectionRatio") * get_setup_hold_time() :
3970 HLS_D->has_parameter(
"OutputDSPConnectionRatio") ?
3971 HLS_D->get_parameter<
double>(
"OutputDSPConnectionRatio") * get_setup_hold_time() :
3973 output_carry_connection_time =
3974 parameters->IsParameter(
"OutputCarryConnectionRatio") ?
3975 parameters->GetParameter<
double>(
"OutputCarryConnectionRatio") * get_setup_hold_time() :
3976 HLS_D->has_parameter(
"OutputCarryConnectionRatio") ?
3977 HLS_D->get_parameter<
double>(
"OutputCarryConnectionRatio") * get_setup_hold_time() :
3979 fanout_coefficient =
parameters->IsParameter(
"FanOutCoefficient") ?
3980 parameters->GetParameter<
double>(
"FanOutCoefficient") :
3982 max_fanout_size =
parameters->IsParameter(
"MaxFanOutSize") ?
parameters->GetParameter<
size_t>(
"MaxFanOutSize") :
3985 HLS_D->has_parameter(
"DSPs_margin") &&
parameters->getOption<
double>(OPT_DSP_margin_combinational) == 1.0 ?
3986 HLS_D->get_parameter<
double>(
"DSPs_margin") :
3987 parameters->getOption<
double>(OPT_DSP_margin_combinational);
3989 HLS_D->has_parameter(
"DSPs_margin_stage") &&
parameters->getOption<
double>(OPT_DSP_margin_pipelined) == 1.0 ?
3990 HLS_D->get_parameter<
double>(
"DSPs_margin_stage") :
3991 parameters->getOption<
double>(OPT_DSP_margin_pipelined);
3992 DSP_allocation_coefficient = HLS_D->has_parameter(
"DSP_allocation_coefficient") &&
3993 parameters->getOption<
double>(OPT_DSP_allocation_coefficient) == 1.0 ?
3994 HLS_D->get_parameter<
double>(
"DSP_allocation_coefficient") :
3995 parameters->getOption<
double>(OPT_DSP_allocation_coefficient);
3997 n_complex_operations = 0;
4008 HLS_C = HLS_constraintsConstRef();
4009 HLS_D = HLS_deviceConstRef();
4011 Rmem = memoryConstRef();
4015 n_complex_operations = 0;
4016 id_to_fu_names.clear();
4017 is_vertex_bounded_rel.clear();
4019 memory_units.clear();
4021 precision_map.clear();
4022 proxy_function_units.clear();
4023 proxy_memory_units.clear();
4024 proxy_wrapped_units.clear();
4025 tech_constraints.clear();
4026 node_id_to_fus.clear();
4027 fus_to_node_id.clear();
4029 memory_units_sizes.clear();
4030 vars_to_memory_units.clear();
4031 precomputed_pipeline_unit.clear();
4032 single_bool_test_cond_expr_units.clear();
4033 simple_pointer_plus_expr.clear();
4034 vars_to_memory_units.clear();
4035 precomputed_pipeline_unit.clear();
4036 single_bool_test_cond_expr_units.clear();
4037 simple_pointer_plus_expr.clear();
4039 ssa_bb_versions.clear();
4040 ssa_cond_exprs.clear();
4041 cond_expr_bb_versions.clear();
4049 if(CanImplementSetNotEmpty(statement_index) && get_DSPs(GetFuType(statement_index)) != 0.0)
4055 const auto zero_distance_operations = GetZeroDistanceOperations(statement_index);
4056 const auto statement_bb_index = GetPointer<const gimple_node>(TreeM->CGetTreeNode(statement_index))->bb_index;
4058 const auto fd = GetPointer<const function_decl>(TreeM->CGetTreeNode(function_id));
4059 const auto sl = GetPointer<const statement_list>(
GET_CONST_NODE(fd->body));
4060 const auto blocks = sl->list_of_bloc;
4061 const auto statement_bb = blocks.find(statement_bb_index)->second;
4063 const auto tn = TreeM->CGetTreeNode(statement_index);
4064 const bool is_carry = [&]() ->
bool {
4065 const auto ga = GetPointer<const gimple_assign>(tn);
4071 if(op1_kind == plus_expr_K || op1_kind == minus_expr_K || op1_kind == ternary_plus_expr_K ||
4072 op1_kind == ternary_pm_expr_K || op1_kind == ternary_mp_expr_K || op1_kind == ternary_mm_expr_K ||
4073 op1_kind == eq_expr_K || op1_kind == ne_expr_K || op1_kind == gt_expr_K || op1_kind == ge_expr_K ||
4074 op1_kind == lt_expr_K || op1_kind == le_expr_K || op1_kind == pointer_plus_expr_K)
4083 for(
const auto zero_distance_operation : zero_distance_operations)
4085 if(CanImplementSetNotEmpty(zero_distance_operation) && get_DSPs(GetFuType(zero_distance_operation)) != 0.0)
4087 const auto zero_distance_operation_bb_index =
4088 GetPointer<const gimple_node>(TreeM->CGetTreeNode(zero_distance_operation))->bb_index;
4089 auto to_dsp_register_delay =
4090 (
parameters->IsParameter(
"ToDSPRegisterDelay") ?
parameters->GetParameter<
double>(
"ToDSPRegisterDelay") :
4092 get_setup_hold_time();
4094 if(statement_bb_index != zero_distance_operation_bb_index)
4097 "---" +
STR(zero_distance_operation) +
" mapped on DSP on different BB");
4098 to_dsp_register_delay += 2 * ((
parameters->IsParameter(
"ToDSPRegisterDelay") ?
4099 parameters->GetParameter<
double>(
"ToDSPRegisterDelay") :
4101 get_setup_hold_time());
4106 "---" +
STR(zero_distance_operation) +
" mapped on DSP on same BB");
4110 to_dsp_register_delay += output_carry_connection_time;
4112 if(to_dsp_register_delay > ret)
4114 ret = to_dsp_register_delay;
4120 "---" +
STR(zero_distance_operation) +
" not mapped on DSP");
4129 const auto bb_version = hls_manager->CGetFunctionBehavior(function_index)->GetBBVersion();
4130 if(zero_distance_ops_bb_version.find(statement_index) != zero_distance_ops_bb_version.end() &&
4131 zero_distance_ops_bb_version.find(statement_index)->second == bb_version)
4134 "---Get Zero Distance Operations of " +
STR(statement_index) +
" - Using cached values");
4135 return zero_distance_ops.find(statement_index)->second;
4140 "-->Computing Zero Distance Operations of " +
STR(statement_index));
4141 zero_distance_ops[statement_index].clear();
4142 zero_distance_ops_bb_version[statement_index] = bb_version;
4145 to_be_analyzed_ops.insert(statement_index);
4146 while(to_be_analyzed_ops.size())
4148 const auto current_tn_index = *(to_be_analyzed_ops.begin());
4149 to_be_analyzed_ops.erase(to_be_analyzed_ops.begin());
4150 already_analyzed.insert(current_tn_index);
4152 "-->Considering " +
STR(TreeM->CGetTreeNode(current_tn_index)));
4153 const auto current_ga = GetPointer<const gimple_assign>(TreeM->CGetTreeNode(current_tn_index));
4159 const auto current_sn = GetPointer<const ssa_name>(
GET_CONST_NODE(current_ga->op0));
4165 if(current_tn_index != statement_index)
4167 if(GetCycleLatency(statement_index) > 1)
4174 CanImplementSetNotEmpty(current_tn_index) ? GetFuType(current_tn_index) :
fu_binding::UNKNOWN, 0)
4181 for(
const auto& use_stmt : current_sn->CGetUseStmts())
4183 const auto use_stmt_index = use_stmt.first->index;
4184 if(already_analyzed.find(use_stmt_index) != already_analyzed.end())
4194 to_be_analyzed_ops.insert(use_stmt_index);
4195 zero_distance_ops[statement_index].insert(use_stmt_index);
4198 "<--Considered " +
STR(TreeM->CGetTreeNode(current_tn_index)));
4201 "<--Computed Zero Distance Operations of " +
STR(statement_index));
4202 return zero_distance_ops[statement_index];
4208 os <<
"node_kind: " << node_kind <<
"\n";
4209 os <<
"node_kind: " << node_kind <<
"\n";
4210 for(
auto el : input_prec)
4215 for(
auto el : base128_input_nelem)
4220 for(
auto el : real_input_nelem)
4225 os <<
"output_prec: " << output_prec <<
"\n";
4226 os <<
"base128_output_nelem: " << base128_output_nelem <<
"\n";
4227 os <<
"real_output_nelem: " << real_output_nelem <<
"\n";
4228 os <<
"is_single_bool_test_cond_expr: " << (is_single_bool_test_cond_expr ?
"T" :
"F") <<
"\n";
4229 os <<
"is_simple_pointer_plus_expr: " << (is_single_bool_test_cond_expr ?
"T" :
"F") <<
"\n";
#define GET_NODE(t)
Macro used to hide implementation details when accessing a tree_node from another tree_node...
static bool is_a_struct(const tree_managerConstRef &TM, const unsigned int index)
Return if treenode index is a record.
#define MEMORY_CTRL_TYPE_DPROXY
static tree_nodeConstRef GetFormalIth(const tree_nodeConstRef &obj, unsigned int parm_index)
Return the type of the ith formal parameter in case index_obj is a call_expr.
#define DEBUG_LEVEL_VERY_PEDANTIC
extremely verbose debugging print is performed.
static bool IsUnionType(const tree_nodeConstRef &type)
Return if treenode is an union.
static bool IsComplexType(const tree_nodeConstRef &type)
Return if treenode is a complex.
Data structure representing the entire HLS information.
#define INDENT_DBG_MEX(dbgLevel, curDbgLevel, mex)
We are producing a debug version of the program, so the message is printed;.
Collect information about resource area.
void Initialize() override=0
Initialize all the data structure.
static std::vector< unsigned long long > GetArrayDimensions(const tree_nodeConstRef &node)
Return the dimension of the array.
#define NUM_CST_allocation_default_output_DSP_connection_ratio
The default value for the connection ratio between the output delay of a DSP and the setup delay...
static unsigned long long AccessedMaximumBitsize(const tree_nodeConstRef &type_node, unsigned long long bitsize)
return the maximum bitsize associated with the elements accessible through type_node ...
Basic block control flow graph.
File containing functions and utilities to support the printing of debug messagges.
#define PRINT_DBG_MEX(dbgLevel, curDbgLevel, mex)
We are producing a debug version of the program, so the message is printed;.
static bool is_int(const tree_managerConstRef &TM, const unsigned int index)
Return true if the treenode is of integer type.
std::string ToString() const
Print this node as string in gimple format.
#define DEBUG_LEVEL_PEDANTIC
very verbose debugging print is performed.
technology_nodeRef get_fu(const std::string &fu_name, const std::string &Library) const
Return the reference to a component given its name.
This file contains the structures needed to manage a graph that will represent the state transition g...
static bool IsConstant(const tree_nodeConstRef &node)
Return if a tree node is a constant object.
const tree_nodeRef CGetTreeReindex(const unsigned int i) const
Return a tree_reindex wrapping the i-th tree_node.
#define GET_CLASS(obj)
Macro returning the actual type of an object.
Dest from_strongtype_cast(Source source)
const structural_objectRef get_circ() const
Get a reference to circ field.
const std::vector< std::string > SplitString(const std::string &input, const std::string &separators)
Function which splits a string into tokens.
static bool IsArrayEquivType(const tree_nodeConstRef &type)
Return true if treenode is an array or it is equivalent to an array (record recursively having a sing...
#define ARRAY_1D_STD_BRAM_NN_SDS_BUS
T resize_1_8_pow2(T value)
#define SF_FFDATA_CONVERTER_64_32_STD
refcount< const tree_manager > tree_managerConstRef
mathematical utility function not provided by standard libraries
#define INDENT_OUT_MEX(outLevel, curOutLevel, mex)
static std::string GetString(const enum kind k)
Given a kind, return the corresponding string.
static tree_nodeConstRef CGetElements(const tree_nodeConstRef &type)
Given an array or a vector return the element type.
#define GET_NAME(data, vertex_index)
Helper macro returning the name associated with a node.
#define MEMORY_CTRL_TYPE_D00
#define ARRAY_1D_STD_BRAM_NN_SDS
CustomOrderedMap< T, U > CustomMap
const HLS_deviceRef HLS_D
reference to the information representing the target for the synthesis
exceptions managed by PandA
#define NUM_CST_allocation_default_connection_offset
The default value for connection offset.
static const ControlStep UNKNOWN
Constant used to specify unknown control step.
Collect information about resource performance.
#define ARRAY_1D_STD_BRAM_SDS
time_infoRef time_m
class representing the timing information associated with this operation
refcount< const OpGraph > OpGraphConstRef
#define MEMORY_TYPE_SYNCHRONOUS_SDS_BUS
ControlStep get_initiation_time() const
#define LIBRARY_STD_FU
standard library where all standard HLS resources are defined
Class specification of the manager of the technology library data structures.
This class specifies the characteristic of a particular functional unit.
Data structure describing a basic block at tree level.
Base class description of data information associated with each node of a graph.
static unsigned int get_type_index(const tree_managerConstRef &TM, const unsigned int index, long long int &vec_size, bool &is_a_pointer, bool &is_a_function)
Return the treenode index of the type of index.
#define ASSIGN_SIGNED_STD
redefinition of map to manage ordered/unordered structures
std::tuple< unsigned int, unsigned int > io_binding_type
tuple set used to represent the required values or the constant default value associated with the inp...
Absolute Control step First field is the basic block Second field is the relative control step...
#define MEMORY_TYPE_ASYNCHRONOUS
#define MEMORY_CTRL_TYPE_PROXY
const tree_nodeConstRef CGetTreeNode(const unsigned int i) const
refcount< technology_node > technology_nodeRef
refcount definition of the class
virtual enum kind get_kind() const =0
Virtual function returning the type of the actual class.
#define THROW_WARNING(str_expr)
helper function used to throw a warning in a standard way: though it uses PRINT_DBG_MEX, the debug level used is such that the message is always printed
#define STR(s)
Macro which performs a lexical_cast to a string.
Auxiliary methods for manipulating string.
#define MEMORY_CTRL_TYPE_SPROXY
structural_managerRef top
Store the top description.
static bool IsUnsignedIntegerType(const tree_nodeConstRef &type)
Return true if the treenode is of unsigned integer type.
#define MEMORY_TYPE_SYNCHRONOUS_UNALIGNED
void update(const unsigned int name, int delta)
Function used to update the copy of the technology constraints.
const tree_nodeRef get_tree_node_const(unsigned int i) const
Return the reference to the i-th tree_node Constant version of get_tree_node.
static bool IsSignedIntegerType(const tree_nodeConstRef &type)
Return true if the treenode is of integer type.
unsigned int operator()(const unsigned int name) const
Required functor function used to compute the number of resources associated with the given resource...
const OpNodeInfoConstRef CGetOpNodeInfo(const vertex node) const
Returns the info associated with a node.
std::vector< unsigned int > tech
copy of the technology constraints
static bool IsArrayType(const tree_nodeConstRef &type)
Return true if treenode is an array.
#define THROW_UNREACHABLE(str_expr)
helper function used to specify that some points should never be reached
bool starts_with(const std::string &str, const std::string &pattern)
ScheduleRef Rsch
Store the refcounted scheduling of the operations.
double get_resource_value(value_t val) const
static unsigned long long Size(const tree_nodeConstRef &tn)
Return the size of a tree object.
#define EXIT_ID
constant used to represent tree node index of exit operation
const unsigned int function_index
The index of the function to which this IR is associated.
static bool IsBooleanType(const tree_nodeConstRef &type)
Return true if the treenode is of bool type.
This class specifies the characteristic of a particular operation working on a given functional unit...
area_infoRef area_m
This variable stores the resource information of the component.
Data structure used to store the schedule of the operations.
unsigned int get_number_of_states() const
#define NUM_CST_allocation_default_states_number_normalization
The default value used in computation of controller delay.
#define NUM_CST_allocation_default_states_number_normalization_linear_factor
The default value used in computation of controller delay when basic block are considered.
Class specification of the data structures used to manage technology information. ...
#define ARRAY_1D_STD_DISTRAM_SDS
double get_area_value() const
Return the nominal value for the area of the component.
const unsigned int index
Represent the index read from the raw file and the index-1 of the vector of tree_node associated to t...
redefinition of set to manage ordered/unordered structures
#define ARRAY_1D_STD_BRAM_NN
const operation_vec & get_operations() const
Return the operations that the functional unit can handle.
#define MEMORY_TYPE_SYNCHRONOUS_SDS
boost::graph_traits< graph >::vertex_descriptor vertex
vertex definition.
double get_execution_time() const
static bool IsVectorType(const tree_nodeConstRef &type)
Return true if the treenode is a vector.
#define GET_CONST_NODE(t)
Classes specification of the tree_node data structures.
static std::string NormalizeTypename(const std::string &id)
Return normalized name of types and variables.
#define THROW_ERROR(str_expr)
helper function used to throw an error in a standard way
static const unsigned int UNKNOWN
The value used to identified unknown functional unit.
Data structure definition for HLS constraints.
#define NUM_CST_allocation_default_states_number_normalization_BB
The default value used in computation of controller delay when basic block are considered.
#define ARRAY_1D_STD_DISTRAM_NN_SDS
This file collects some utility functions.
#define MEMORY_CTRL_TYPE_PROXYN
constants used by HLS constants
std::string id_type
Original type id of the structural object.
const structural_type_descriptorRef & get_typeRef() const
Return the type descriptor of the structural_object.
#define ASSIGN_UNSIGNED_STD
struct definition of the type node structures.
technology_nodeRef get_operation(const std::string &op_name) const
This method returns the operationRef from its name if the functional unit contains an operation of ty...
Class specification of the tree_reindex support class.
#define ENTRY_ID
constant used to represent tree node index of entry operation
static bool is_an_union(const tree_managerConstRef &TM, const unsigned int index)
Return if treenode index is an union.
static bool is_concat_bit_ior_expr(const tree_managerConstRef &TM, const unsigned int index)
check if a given tree node is a concatenation operation
Data structure used to store the functional-unit binding of the vertexes.
static void get_array_dimensions(const tree_managerConstRef &TM, const unsigned int index, std::vector< unsigned long long > &dims)
Return the dimension of the array.
#define WORK_LIBRARY
working library.
#define MEMORY_CTRL_TYPE_DPROXYN
#define INFINITE_UINT
UNSIGNED INT representing infinite.
#define ARRAY_1D_STD_BRAM_SDS_BUS
Class specification of the basic_block structure.
technology_managerRef get_technology_manager() const
Returns the technology manager.
#define NUM_CST_allocation_default_output_carry_connection_ratio
The default value for the connection ratio between the output delay of a carry and the setup delay...
static tree_nodeConstRef CGetType(const tree_nodeConstRef &node)
Return the treenode of the type of node.
static bool IsStructType(const tree_nodeConstRef &type)
Return true if treenode is a record.
#define NUM_CST_allocation_default_fanout_coefficent
The default value used in computation of fanout delay.
constraint functor used by get_attribute_of_fu_per_op
#define MEMORY_CTRL_TYPE_SPROXYN
Classes specification of the tree_node data structures not present in the gcc.
this class is used to manage the command-line or XML options.
void Clear() override=0
Clear all the data structure.
updatecopy_HLS_constraints_functor(const AllocationInformationRef allocation_information)
Constructor.
Class implementation of the structural_manager.
StateTransitionGraphManagerRef STG
Store the refcounted state transition graph.
Data structure that contains all information about high level synthesis process.
const HLS_managerRef hls_manager
The HLS manager.
const HLS_constraintsRef HLS_C
store the HLS constraints
This package is used by all HLS packages to manage resource constraints and characteristics.
double get_stage_period() const
std::vector< technology_nodeRef > operation_vec
Type definition of a vector of functional_unit.
#define NUM_CST_allocation_default_max_fanout_size
The default value used in computation of fanout delay.
#define DEBUG_LEVEL_VERBOSE
verbose debugging print is performed.
static bool IsPointerType(const tree_nodeConstRef &type)
Return true if treenode index is a pointer.
Data structure definition for high-level synthesis flow.
static void ComputeSsaUses(const tree_nodeRef &, TreeNodeMap< size_t > &uses)
recursively compute the references to the ssa_name variables used in a statement
void print(std::ostream &os) const
Datastructure to represent memory information in high-level synthesis.
Class specification of the manager of the tree structures extracted from the raw file.
HLS specialization of generic_device.
std::string get_library(const std::string &Name) const
Return the higher priority library where the given component is stored.
static bool is_an_array(const tree_managerConstRef &TM, const unsigned int index)
Return if treenode index is an array or it is equivalent to an array (record recursively having a sin...
static bool IsRealType(const tree_nodeConstRef &type)
Return true if the treenode is of real type.
refcount< const BehavioralHelper > BehavioralHelperConstRef
static void get_array_dim_and_bitsize(const tree_managerConstRef &TM, const unsigned int index, std::vector< unsigned long long > &dims, unsigned long long &elts_bitsize)
Return the dimension of the array.
#define SF_FFDATA_CONVERTER_32_64_STD
#define THROW_ASSERT(cond, str_expr)
helper function used to check an assert and if needed to throw an error in a standard way ...