PandA-2024.02
string_manipulation.cpp
Go to the documentation of this file.
1 /*
2  *
3  * _/_/_/ _/_/ _/ _/ _/_/_/ _/_/
4  * _/ _/ _/ _/ _/_/ _/ _/ _/ _/ _/
5  * _/_/_/ _/_/_/_/ _/ _/_/ _/ _/ _/_/_/_/
6  * _/ _/ _/ _/ _/ _/ _/ _/ _/
7  * _/ _/ _/ _/ _/ _/_/_/ _/ _/
8  *
9  * ***********************************************
10  * PandA Project
11  * URL: http://panda.dei.polimi.it
12  * Politecnico di Milano - DEIB
13  * System Architectures Group
14  * ***********************************************
15  * Copyright (c) 2018-2024 Politecnico di Milano
16  *
17  * This file is part of the PandA framework.
18  *
19  * The PandA framework is free software; you can redistribute it and/or modify
20  * it under the terms of the GNU General Public License as published by
21  * the Free Software Foundation; either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * This program is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU General Public License for more details.
28  *
29  * You should have received a copy of the GNU General Public License
30  * along with this program. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
41 #include "string_manipulation.hpp"
43 
44 #include "exceptions.hpp"
45 #include "panda_types.hpp"
46 
47 #include <boost/algorithm/string/classification.hpp>
48 #include <boost/algorithm/string/split.hpp>
49 #include <cxxabi.h>
50 #include <regex>
51 
52 void add_escape(std::string& ioString, const std::string& to_be_escaped)
53 {
54  for(std::string::size_type lPos = 0; lPos != ioString.size(); lPos++)
55  {
56  if(to_be_escaped.find(ioString[lPos]) != std::string::npos)
57  {
58  char escaped_char[3];
59  escaped_char[0] = '\\';
60  escaped_char[1] = ioString.at(lPos);
61  escaped_char[2] = '\0';
62  ioString.replace(lPos, 1, escaped_char);
63  lPos++;
64  }
65  }
66 }
67 
68 void remove_escaped(std::string& ioString)
69 {
70  for(std::string::size_type lPos = 0; lPos != ioString.size(); lPos++)
71  {
72  if(ioString.at(lPos) == '\\')
73  {
74  if(ioString.at(lPos + 1) == '\\')
75  {
76  ioString.replace(lPos, 2, "\\");
77  }
78  else if(ioString.at(lPos + 1) == 'n')
79  {
80  ioString.replace(lPos, 2, "\n");
81  }
82  else if(ioString.at(lPos + 1) == 't')
83  {
84  ioString.replace(lPos, 2, "\t");
85  }
86  }
87  }
88 }
89 
90 std::string TrimSpaces(const std::string& value)
91 {
92  std::string temp;
93  std::vector<std::string> splitted = SplitString(value, " \n\t\r");
94  bool first = true;
95  for(auto& i : splitted)
96  {
97  if(!first and i.size())
98  {
99  temp += " ";
100  }
101  if(i.size())
102  {
103  temp += i;
104  first = false;
105  }
106  }
107  return temp;
108 }
109 std::string cxa_demangle(const std::string& input)
110 {
111  int status;
112  std::unique_ptr<char, void (*)(void*)> res(abi::__cxa_demangle(input.data(), nullptr, nullptr, &status), std::free);
113  return status == 0 ? std::string(res.get()) : "";
114 }
115 
116 std::string cxa_rename_mangled(const std::string& signature, const std::string& new_fname)
117 {
118  auto z_pos = signature.find('Z');
119  if(z_pos != std::string::npos)
120  {
121  const char* z_start = signature.data() + z_pos + 1;
122  char* z_end;
123  auto z_len = std::strtoul(z_start, &z_end, 10);
124  if(z_start != z_end)
125  {
126  return signature.substr(0, z_pos + 1) + std::to_string(new_fname.size()) + new_fname +
127  signature.substr(static_cast<size_t>(std::distance(signature.data(), static_cast<const char*>(z_end))) +
128  z_len);
129  }
130  }
131  return new_fname;
132 }
133 
134 std::string cxa_prefix_mangled(const std::string& signature, const std::string& prefix)
135 {
136  auto z_pos = signature.find('Z');
137  if(z_pos != std::string::npos)
138  {
139  const char* z_start = signature.data() + z_pos + 1;
140  char* z_end;
141  auto z_len = std::strtoul(z_start, &z_end, 10);
142  if(z_start != z_end)
143  {
144  return signature.substr(0, z_pos + 1) + std::to_string(prefix.size() + z_len) + prefix +
145  signature.substr(static_cast<size_t>(std::distance(signature.data(), static_cast<const char*>(z_end))));
146  }
147  }
148  return prefix + signature;
149 }
150 
151 std::string capitalize(const std::string& str)
152 {
153  std::string s(str);
154  return capitalize(s);
155 }
156 
157 std::string& capitalize(std::string& str)
158 {
159  str[0] = static_cast<char>(toupper(str[0]));
160  return str;
161 }
162 
163 static const std::regex fixed_def("a[cp]_(u)?fixed<\\s*(\\d+)\\s*,\\s*(\\d+),?\\s*(\\w+)?[^>]*>[^\\d-]*");
164 #define FD_GROUP_U 1
165 #define FD_GROUP_W 2
166 #define FD_GROUP_D 3
167 #define FD_GROUP_SIGN 4
168 
169 std::string ConvertInBinary(const std::string& C_value, unsigned long long precision, const bool real_type,
170  bool unsigned_type)
171 {
172  std::string trimmed_value = C_value;
173  THROW_ASSERT(C_value != "", "Empty string for binary conversion");
174 
175  bool is_signed, is_fixed;
176  const auto ac_bw = ac_type_bitwidth(C_value, is_signed, is_fixed);
177  if(ac_bw)
178  {
179  unsigned_type = !is_signed;
180  trimmed_value = trimmed_value.substr(trimmed_value.find('>') + 1);
181  }
182 
183  if(real_type)
184  {
185  trimmed_value = convert_fp_to_string(C_value, precision);
186  }
187  else if(is_fixed)
188  {
189  std::cmatch what;
190 #if HAVE_ASSERTS
191  const auto is_match =
192 #endif
193  std::regex_search(C_value.c_str(), what, fixed_def);
194  THROW_ASSERT(is_match, "");
195  const auto w = std::stoul(
196  std::string(what[FD_GROUP_W].first, static_cast<size_t>(what[FD_GROUP_W].second - what[FD_GROUP_W].first)));
197  const auto d = std::stoul(
198  std::string(what[FD_GROUP_D].first, static_cast<size_t>(what[FD_GROUP_D].second - what[FD_GROUP_D].first)));
199  is_signed = (what[FD_GROUP_U].second - what[FD_GROUP_U].first) == 0 &&
200  ((what[FD_GROUP_SIGN].second - what[FD_GROUP_SIGN].first) == 0 ||
201  strncmp(what[FD_GROUP_SIGN].first, "true", 4) == 0);
202  THROW_ASSERT(d < w, "Decimal part should be smaller then total length");
203  const long double val = strtold(what[0].second, nullptr) * powl(2, w - d);
204  // TODO: update regex to handle overflow correctly
205  auto fixp = integer_cst_t(val);
206  is_signed &= val < 0;
207  trimmed_value.clear();
208  while(trimmed_value.size() < w)
209  {
210  trimmed_value = ((fixp & 1) ? "1" : "0") + trimmed_value;
211  fixp >>= 1;
212  }
213  while(trimmed_value.size() < precision)
214  {
215  trimmed_value = (is_signed ? trimmed_value.front() : '0') + trimmed_value;
216  }
217  }
218  else
219  {
220  long long int ll_value;
221  if(trimmed_value[0] == '\"')
222  {
223  trimmed_value = trimmed_value.substr(1);
224  trimmed_value = trimmed_value.substr(0, trimmed_value.find('\"'));
225  if(trimmed_value[0] == '0' && trimmed_value[1] == 'b')
226  {
227  trimmed_value = trimmed_value.substr(2);
228  }
229  else if(trimmed_value[0] == '0' && (trimmed_value[1] == 'x' || trimmed_value[1] == 'o'))
230  {
231  bool is_hex = trimmed_value[1] == 'x';
232  std::string initial_string = trimmed_value.substr(2);
233  trimmed_value = "";
234  std::string hexTable[16] = {"0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111",
235  "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111"};
236  std::string octTable[16] = {"000", "001", "010", "011", "100", "101", "110", "111"};
237  for(char curChar : initial_string)
238  {
239  int off = 0;
240  if(is_hex)
241  {
242  if(curChar >= '0' && curChar <= '9')
243  {
244  off = curChar - '0';
245  }
246  else if(curChar >= 'A' && curChar <= 'F')
247  {
248  off = curChar - 'A' + 10;
249  }
250  else if(curChar >= 'a' && curChar <= 'f')
251  {
252  off = curChar - 'a' + 10;
253  }
254  else
255  {
256  THROW_ERROR("unexpected char in hex string");
257  }
258  }
259  else
260  {
261  if(curChar >= '0' && curChar <= '8')
262  {
263  off = curChar - '0';
264  }
265  else
266  {
267  THROW_ERROR("unexpected char in octal string");
268  }
269  }
270  trimmed_value = trimmed_value + (is_hex ? hexTable[off] : octTable[off]);
271  }
272  }
273  else
274  {
275  THROW_ERROR("unsupported format");
276  }
277 
278  while(trimmed_value.size() < precision)
279  {
280  trimmed_value = "0" + trimmed_value;
281  }
282  while(trimmed_value.size() > precision)
283  {
284  trimmed_value = trimmed_value.substr(1);
285  }
286  return trimmed_value;
287  }
288  else if(trimmed_value[0] == '\'')
289  {
290  trimmed_value = trimmed_value.substr(1);
291  THROW_ASSERT(trimmed_value.find('\'') != std::string::npos, "unxpected case");
292  trimmed_value = trimmed_value.substr(0, trimmed_value.find('\''));
293  if(trimmed_value[0] == '\\')
294  {
295  ll_value = std::stoll(trimmed_value.substr(1));
296  }
297  else
298  {
299  ll_value = boost::lexical_cast<char>(trimmed_value);
300  }
301  }
302  else if(unsigned_type)
303  {
304  std::string::size_type sz = 0;
305  unsigned long long ull = std::stoull(trimmed_value, &sz, 0);
306  ll_value = static_cast<long long int>(ull);
307  }
308  else
309  {
310  std::string::size_type sz = 0;
311  ll_value = std::stoll(trimmed_value, &sz, 0);
312  }
313  auto ull_value = static_cast<unsigned long long int>(ll_value);
314  trimmed_value = "";
315  if(precision <= 64)
316  {
317  for(unsigned int ind = 0; ind < precision; ind++)
318  {
319  trimmed_value = trimmed_value + (((1LLU << (precision - ind - 1)) & ull_value) ? '1' : '0');
320  }
321  }
322  else
323  {
324  for(unsigned int ind = 0; ind < (precision - 64); ind++)
325  {
326  trimmed_value = trimmed_value + '0';
327  }
328  for(unsigned int ind = 0; ind < 64; ind++)
329  {
330  trimmed_value = trimmed_value + (((1LLU << (64 - ind - 1)) & ull_value) ? '1' : '0');
331  }
332  }
333  }
334  return trimmed_value;
335 }
336 
337 static const std::regex fixp_val("(\\d+\\.?\\d*)");
338 
339 std::string FixedPointReinterpret(const std::string& FP_vector, const std::string& fp_typename)
340 {
341  std::cmatch what;
342  if(std::regex_search(fp_typename.c_str(), what, fixed_def))
343  {
344  const auto w = std::stoul(
345  std::string(what[FD_GROUP_W].first, static_cast<size_t>(what[FD_GROUP_W].second - what[FD_GROUP_W].first)));
346  const auto d = std::stoul(
347  std::string(what[FD_GROUP_D].first, static_cast<size_t>(what[FD_GROUP_D].second - what[FD_GROUP_D].first)));
348  THROW_ASSERT(d < w, "Decimal part should be smaller then total length");
349  std::sregex_token_iterator fix_val_it(FP_vector.begin(), FP_vector.end(), fixp_val), end;
350  std::string new_vector = "{";
351  while(fix_val_it != end)
352  {
353  const long double val = strtold(fix_val_it->str().c_str(), nullptr) * powl(2, w - d);
354  // TODO: update regex to handle overflow correctly
355  const auto fixp = static_cast<long long>(val);
356  new_vector += "{{{" + STR(fixp) + "}}}, ";
357  ++fix_val_it;
358  }
359  new_vector.erase(new_vector.size() - 2, 2);
360  new_vector += "}";
361  return new_vector;
362  }
363  return FP_vector;
364 }
365 
366 const std::vector<std::string> SplitString(const std::string&
367 #ifndef __clang_analyzer__
368  input
369 #endif
370  ,
371  const std::string&
372 #ifndef __clang_analyzer__
373  separators
374 #endif
375 )
376 {
377  std::vector<std::string> ret_value;
378 #ifndef __clang_analyzer__
379  boost::algorithm::split(ret_value, input, boost::algorithm::is_any_of(separators));
380 #endif
381  return ret_value;
382 }
383 
384 std::string convert_fp_to_string(std::string num, unsigned long long precision)
385 {
386  union
387  {
388  unsigned long long ll;
389  double d;
390  unsigned int i;
391  float f;
392  } u = {};
393  std::string res;
394  char* endptr = nullptr;
395 
396  switch(precision)
397  {
398  case 32:
399  {
400  if(num == "__Inf")
401  {
402  u.f = 1.0f / 0.0f;
403  }
404  else if(num == "-__Inf")
405  {
406  u.f = -1.0f / 0.0f;
407  }
408  else if(num == "__Nan")
409  {
410  u.f = 0.0f / 0.0f;
411  }
412  else if(num == "-__Nan")
413  {
414  u.f = -(0.0f / 0.0f);
415  }
416  else
417  {
418  u.f = strtof(num.c_str(), &endptr);
419  }
420  res = "";
421  for(unsigned int ind = 0; ind < precision; ind++)
422  {
423  res = res + (((1U << (precision - ind - 1)) & u.i) ? '1' : '0');
424  }
425  break;
426  }
427  case 64:
428  {
429  if(num == "__Inf")
430  {
431  u.d = 1.0 / 0.0;
432  }
433  else if(num == "-__Inf")
434  {
435  u.d = -1.0 / 0.0;
436  }
437  else if(num == "__Nan")
438  {
439  u.d = 0.0 / 0.0;
440  }
441  else if(num == "-__Nan")
442  {
443  u.d = -(0.0 / 0.0);
444  }
445  else
446  {
447  u.d = strtod(num.c_str(), &endptr);
448  }
449  res = "";
450  for(unsigned int ind = 0; ind < precision; ind++)
451  {
452  res = res + (((1LLU << (precision - ind - 1)) & u.ll) ? '1' : '0');
453  }
454  break;
455  }
456  default:
457  throw std::string("not supported precision ") + STR(precision);
458  }
459  return res;
460 }
461 
462 unsigned long long convert_fp_to_bits(std::string num, unsigned long long precision)
463 {
464  union
465  {
466  unsigned long long ll;
467  double d;
468  unsigned int i;
469  float f;
470  } u;
471  char* endptr = nullptr;
472 
473  switch(precision)
474  {
475  case 32:
476  {
477  if(num == "__Inf")
478  {
479  u.f = 1.0f / 0.0f;
480  }
481  else if(num == "-__Inf")
482  {
483  u.f = -1.0f / 0.0f;
484  }
485  else if(num == "__Nan")
486  {
487  u.f = 0.0f / 0.0f;
488  }
489  else if(num == "-__Nan")
490  {
491  u.f = -(0.0f / 0.0f);
492  }
493  else
494  {
495  u.f = strtof(num.c_str(), &endptr);
496  }
497  return u.i;
498  }
499  case 64:
500  {
501  if(num == "__Inf")
502  {
503  u.d = 1.0 / 0.0;
504  }
505  else if(num == "-__Inf")
506  {
507  u.d = -1.0 / 0.0;
508  }
509  else if(num == "__Nan")
510  {
511  u.d = 0.0 / 0.0;
512  }
513  else if(num == "-__Nan")
514  {
515  u.d = -(0.0 / 0.0);
516  }
517  else
518  {
519  u.d = strtod(num.c_str(), &endptr);
520  }
521  return u.ll;
522  }
523  default:
524  throw std::string("not supported precision ") + STR(precision);
525  }
526  return 0;
527 }
528 
529 static const std::regex ac_type_def("a[cp]_(u)?(\\w+)<\\s*(\\d+)\\s*,?\\s*(\\d+)?,?\\s*(\\w+)?[^>]*>");
530 #define AC_GROUP_U 1
531 #define AC_GROUP_T 2
532 #define AC_GROUP_W 3
533 #define AC_GROUP_SIGN 4
534 
535 unsigned long long ac_type_bitwidth(const std::string& intType, bool& is_signed, bool& is_fixed)
536 {
537  std::cmatch what;
538  is_signed = false;
539  is_fixed = false;
540  if(std::regex_search(intType.c_str(), what, ac_type_def))
541  {
542  auto w = std::stoull(
543  std::string(what[AC_GROUP_W].first, static_cast<size_t>(what[AC_GROUP_W].second - what[AC_GROUP_W].first)));
544  is_signed = (what[AC_GROUP_U].second - what[AC_GROUP_U].first) == 0 &&
545  ((what[AC_GROUP_SIGN].second - what[AC_GROUP_SIGN].first) == 0 ||
546  strncmp(what[AC_GROUP_SIGN].first, "true", 4) == 0);
547  is_fixed =
548  std::string(what[AC_GROUP_T].first, static_cast<size_t>(what[AC_GROUP_T].second - what[AC_GROUP_T].first))
549  .find("fixed") != std::string::npos;
550  return w;
551  }
552  return 0;
553 }
std::string convert_fp_to_string(std::string num, unsigned long long precision)
convert a real number stored in a string into a string of bits with a given precision ...
#define AC_GROUP_SIGN
struct definition of the real_type tree node.
Definition: tree_node.hpp:4039
void remove_escaped(std::string &ioString)
Function converting all the escaped characters in the associated character.
std::string capitalize(const std::string &str)
static const std::regex ac_type_def("a[cp]_(u)?(\+)<\*(\+)\*,?\*(\+)?,?\*(\+)?[^>]*>")
const std::vector< std::string > SplitString(const std::string &input, const std::string &separators)
Function which splits a string into tokens.
int input[SIZE]
Definition: hash.h:1
exceptions managed by PandA
#define FD_GROUP_U
std::string ConvertInBinary(const std::string &C_value, unsigned long long precision, const bool real_type, bool unsigned_type)
Convert a string storing a number in decimal format into a string in binary format.
static const std::regex fixp_val("(\+\?\*)")
void add_escape(std::string &ioString, const std::string &to_be_escaped)
Header include.
#define STR(s)
Macro which performs a lexical_cast to a string.
Auxiliary methods for manipulating string.
#define AC_GROUP_W
APInt integer_cst_t
Definition: panda_types.hpp:47
#define AC_GROUP_U
std::string cxa_rename_mangled(const std::string &signature, const std::string &new_fname)
#define THROW_ERROR(str_expr)
helper function used to throw an error in a standard way
Definition: exceptions.hpp:263
#define AC_GROUP_T
std::string TrimSpaces(const std::string &value)
static const std::regex fixed_def("a[cp]_(u)?fixed<\*(\+)\*,\*(\+),?\*(\+)?[^>]*>[^\-]*")
TYPE distance(TYPE position_x[nAtoms], TYPE position_y[nAtoms], TYPE position_z[nAtoms], int i, int j)
Definition: md_kernel_test.c:3
char str[25]
Definition: fixedptc.c:8
unsigned long long ac_type_bitwidth(const std::string &intType, bool &is_signed, bool &is_fixed)
#define FD_GROUP_D
std::string FixedPointReinterpret(const std::string &FP_vector, const std::string &fp_typename)
#define FD_GROUP_W
std::string cxa_prefix_mangled(const std::string &signature, const std::string &prefix)
std::string cxa_demangle(const std::string &input)
#define FD_GROUP_SIGN
unsigned long long convert_fp_to_bits(std::string num, unsigned long long precision)
convert a real number stored in a string into bits with a given precision
#define THROW_ASSERT(cond, str_expr)
helper function used to check an assert and if needed to throw an error in a standard way ...
Definition: exceptions.hpp:289

Generated on Mon Feb 12 2024 13:02:56 for PandA-2024.02 by doxygen 1.8.13