cprover
string_builtin_function.cpp
Go to the documentation of this file.
1 
5 
6 #include <algorithm>
7 #include <iterator>
8 
10 
13  const exprt &return_code,
14  const std::vector<exprt> &fun_args,
15  array_poolt &array_pool)
16  : string_builtin_functiont(return_code, array_pool)
17 {
18  PRECONDITION(fun_args.size() > 2);
19  const auto arg1 = expr_checked_cast<struct_exprt>(fun_args[2]);
20  input = array_pool.find(arg1.op1(), arg1.op0());
21  result = array_pool.find(fun_args[1], fun_args[0]);
22 }
23 
25  const array_string_exprt &a,
26  const std::function<exprt(const exprt &)> &get_value)
27 {
28  if(a.id() == ID_if)
29  {
30  const if_exprt &ite = to_if_expr(a);
31  const exprt cond = get_value(ite.cond());
32  if(!cond.is_constant())
33  return {};
34  return cond.is_true()
35  ? eval_string(to_array_string_expr(ite.true_case()), get_value)
36  : eval_string(to_array_string_expr(ite.false_case()), get_value);
37  }
38 
39  const exprt content = get_value(a.content());
40  const auto &array = expr_try_dynamic_cast<array_exprt>(content);
41  if(!array)
42  return {};
43 
44  const auto &ops = array->operands();
45  std::vector<mp_integer> result;
46  const mp_integer unknown('?');
47  const auto &insert = std::back_inserter(result);
49  ops.begin(), ops.end(), insert, [unknown](const exprt &e) { // NOLINT
50  if(const auto i = numeric_cast<mp_integer>(e))
51  return *i;
52  return unknown;
53  });
54  return result;
55 }
56 
57 template <typename Iter>
58 static array_string_exprt
59 make_string(Iter begin, Iter end, const array_typet &array_type)
60 {
61  const typet &char_type = array_type.element_type();
62  array_exprt array_expr({}, array_type);
63  const auto &insert = std::back_inserter(array_expr.operands());
64  std::transform(begin, end, insert, [&](const mp_integer &i) {
65  return from_integer(i, char_type);
66  });
67  return to_array_string_expr(array_expr);
68 }
69 
71 make_string(const std::vector<mp_integer> &array, const array_typet &array_type)
72 {
73  return make_string(array.begin(), array.end(), array_type);
74 }
75 
77  const std::function<exprt(const exprt &)> &get_value) const
78 {
79  auto input_opt = eval_string(input, get_value);
80  if(!input_opt.has_value())
81  return {};
82  const mp_integer char_val = [&] {
83  if(const auto val = numeric_cast<mp_integer>(get_value(character)))
84  return *val;
85  INVARIANT(
86  get_value(character).id() == ID_unknown,
87  "character valuation should only contain constants and unknown");
89  }();
90  input_opt.value().push_back(char_val);
91  const auto length =
92  from_integer(input_opt.value().size(), result.length_type());
93  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
94  return make_string(input_opt.value(), type);
95 }
96 
104  string_constraint_generatort &generator) const
105 {
108  constraints.universal.push_back([&] {
109  const symbol_exprt idx =
110  generator.fresh_symbol("QA_index_concat_char", result.length_type());
111  const exprt upper_bound =
113  return string_constraintt(
114  idx, upper_bound, equal_exprt(input[idx], result[idx]));
115  }());
116  constraints.existential.push_back(
118  constraints.existential.push_back(
120  return constraints;
121 }
122 
124 {
126 }
127 
129  const std::function<exprt(const exprt &)> &get_value) const
130 {
131  auto input_opt = eval_string(input, get_value);
132  const auto char_opt = numeric_cast<mp_integer>(get_value(character));
133  const auto position_opt = numeric_cast<mp_integer>(get_value(position));
134  if(!input_opt || !char_opt || !position_opt)
135  return {};
136  if(0 <= *position_opt && *position_opt < input_opt.value().size())
137  input_opt.value()[numeric_cast_v<std::size_t>(*position_opt)] = *char_opt;
138  const auto length =
139  from_integer(input_opt.value().size(), result.length_type());
140  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
141  return make_string(input_opt.value(), type);
142 }
143 
154  string_constraint_generatort &generator) const
155 {
159  and_exprt(
164  constraints.universal.push_back([&] {
165  const symbol_exprt q =
166  generator.fresh_symbol("QA_char_set", position.type());
167  const equal_exprt a3_body(result[q], input[q]);
168  return string_constraintt(
169  q,
172  a3_body);
173  }());
174  constraints.universal.push_back([&] {
175  const symbol_exprt q2 =
176  generator.fresh_symbol("QA_char_set2", position.type());
177  const equal_exprt a4_body(result[q2], input[q2]);
178  return string_constraintt(
179  q2,
182  a4_body);
183  }());
184  return constraints;
185 }
186 
188 {
189  const exprt out_of_bounds = or_exprt(
193  position, ID_lt, from_integer(0, input.length_type())));
194  const exprt return_value = if_exprt(
195  out_of_bounds,
198  return and_exprt(
199  equal_exprt(
202  equal_exprt(return_code, return_value));
203 }
204 
205 static bool eval_is_upper_case(const mp_integer &c)
206 {
207  // Characters between 'A' and 'Z' are upper-case
208  // Characters between 0xc0 (latin capital A with grave)
209  // and 0xd6 (latin capital O with diaeresis) are upper-case
210  // Characters between 0xd8 (latin capital O with stroke)
211  // and 0xde (latin capital thorn) are upper-case
212  return ('A' <= c && c <= 'Z') || (0xc0 <= c && c <= 0xd6) ||
213  (0xd8 <= c && c <= 0xde);
214 }
215 
217  const std::function<exprt(const exprt &)> &get_value) const
218 {
219  auto input_opt = eval_string(input, get_value);
220  if(!input_opt)
221  return {};
222  for(mp_integer &c : input_opt.value())
223  {
224  if(eval_is_upper_case(c))
225  c += 0x20;
226  }
227  const auto length =
228  from_integer(input_opt.value().size(), result.length_type());
229  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
230  return make_string(input_opt.value(), type);
231 }
232 
235 static exprt is_upper_case(const exprt &character)
236 {
237  const exprt char_A = from_integer('A', character.type());
238  const exprt char_Z = from_integer('Z', character.type());
239  exprt::operandst upper_case;
240  // Characters between 'A' and 'Z' are upper-case
241  upper_case.push_back(and_exprt(
242  binary_relation_exprt(char_A, ID_le, character),
243  binary_relation_exprt(character, ID_le, char_Z)));
244 
245  // Characters between 0xc0 (latin capital A with grave)
246  // and 0xd6 (latin capital O with diaeresis) are upper-case
247  upper_case.push_back(and_exprt(
249  from_integer(0xc0, character.type()), ID_le, character),
251  character, ID_le, from_integer(0xd6, character.type()))));
252 
253  // Characters between 0xd8 (latin capital O with stroke)
254  // and 0xde (latin capital thorn) are upper-case
255  upper_case.push_back(and_exprt(
257  from_integer(0xd8, character.type()), ID_le, character),
259  character, ID_le, from_integer(0xde, character.type()))));
260  return disjunction(upper_case);
261 }
262 
265 static exprt is_lower_case(const exprt &character)
266 {
267  return is_upper_case(
268  minus_exprt(character, from_integer(0x20, character.type())));
269 }
270 
282  string_constraint_generatort &generator) const
283 {
284  // \todo for now, only characters in Basic Latin and Latin-1 supplement
285  // are supported (up to 0x100), we should add others using case mapping
286  // information from the UnicodeData file.
289  constraints.universal.push_back([&] {
290  const symbol_exprt idx =
291  generator.fresh_symbol("QA_lower_case", result.length_type());
292  const exprt conditional_convert = [&] {
293  // The difference between upper-case and lower-case for the basic
294  // latin and latin-1 supplement is 0x20.
295  const typet &char_type = to_type_with_subtype(result.type()).subtype();
296  const exprt diff = from_integer(0x20, char_type);
297  const exprt converted =
298  equal_exprt(result[idx], plus_exprt(input[idx], diff));
299  const exprt non_converted = equal_exprt(result[idx], input[idx]);
300  return if_exprt(is_upper_case(input[idx]), converted, non_converted);
301  }();
302  return string_constraintt(
303  idx,
305  conditional_convert);
306  }());
307  return constraints;
308 }
309 
311  const std::function<exprt(const exprt &)> &get_value) const
312 {
313  auto input_opt = eval_string(input, get_value);
314  if(!input_opt)
315  return {};
316  for(mp_integer &c : input_opt.value())
317  {
318  if(eval_is_upper_case(c - 0x20))
319  c -= 0x20;
320  }
321  const auto length =
322  from_integer(input_opt.value().size(), result.length_type());
323  const array_typet type(to_type_with_subtype(result.type()).subtype(), length);
324  return make_string(input_opt.value(), type);
325 }
326 
337  symbol_generatort &fresh_symbol) const
338 {
341  constraints.universal.push_back([&] {
342  const symbol_exprt idx =
343  fresh_symbol("QA_upper_case", result.length_type());
344  const typet &char_type =
346  const exprt converted =
347  minus_exprt(input[idx], from_integer(0x20, char_type));
348  return string_constraintt(
349  idx,
351  equal_exprt(
352  result[idx],
353  if_exprt(is_lower_case(input[idx]), converted, input[idx])));
354  }());
355  return constraints;
356 }
357 
364  const exprt &return_code,
365  const std::vector<exprt> &fun_args,
366  array_poolt &array_pool)
367  : string_builtin_functiont(return_code, array_pool)
368 {
369  PRECONDITION(fun_args.size() >= 3);
370  result = array_pool.find(fun_args[1], fun_args[0]);
371  arg = fun_args[2];
372 }
373 
375  const std::function<exprt(const exprt &)> &get_value) const
376 {
377  const auto arg_value = numeric_cast<mp_integer>(get_value(arg));
378  if(!arg_value)
379  return {};
380  static std::string digits = "0123456789abcdefghijklmnopqrstuvwxyz";
381  const auto radix_value = numeric_cast<mp_integer>(get_value(radix));
382  if(!radix_value || *radix_value > digits.length())
383  return {};
384 
385  mp_integer current_value = *arg_value;
386  std::vector<mp_integer> right_to_left_characters;
387  if(current_value < 0)
388  current_value = -current_value;
389  if(current_value == 0)
390  right_to_left_characters.emplace_back('0');
391  while(current_value > 0)
392  {
393  const auto digit_value = (current_value % *radix_value).to_ulong();
394  right_to_left_characters.emplace_back(digits.at(digit_value));
395  current_value /= *radix_value;
396  }
397  if(*arg_value < 0)
398  right_to_left_characters.emplace_back('-');
399 
400  const auto length = right_to_left_characters.size();
401  const auto length_expr = from_integer(length, result.length_type());
402  const array_typet type(
403  to_type_with_subtype(result.type()).subtype(), length_expr);
404  return make_string(
405  right_to_left_characters.rbegin(), right_to_left_characters.rend(), type);
406 }
407 
409  string_constraint_generatort &generator) const
410 {
411  auto pair =
413  pair.second.existential.push_back(equal_exprt(pair.first, return_code));
414  return pair.second;
415 }
416 
418 {
419  const typet &type = result.length_type();
420  const auto radix_opt = numeric_cast<std::size_t>(radix);
421  const auto radix_value = radix_opt.has_value() ? radix_opt.value() : 2;
422  const std::size_t upper_bound =
423  max_printed_string_length(arg.type(), radix_value);
424  const exprt negative_arg =
425  binary_relation_exprt(arg, ID_le, from_integer(0, type));
426  const exprt absolute_arg =
427  if_exprt(negative_arg, unary_minus_exprt(arg), arg);
428 
429  exprt size_expr = from_integer(1, type);
430  exprt min_int_with_current_size = from_integer(1, radix.type());
431  for(std::size_t current_size = 2; current_size <= upper_bound + 1;
432  ++current_size)
433  {
434  min_int_with_current_size = mult_exprt(radix, min_int_with_current_size);
435  const exprt at_least_current_size =
436  binary_relation_exprt(absolute_arg, ID_ge, min_int_with_current_size);
437  size_expr = if_exprt(
438  at_least_current_size, from_integer(current_size, type), size_expr);
439  }
440 
441  const exprt size_expr_with_sign = if_exprt(
442  negative_arg, plus_exprt(size_expr, from_integer(1, type)), size_expr);
443  return equal_exprt(
444  array_pool.get_or_create_length(result), size_expr_with_sign);
445 }
446 
448  const exprt &return_code,
450  array_poolt &array_pool)
451  : string_builtin_functiont(return_code, array_pool), function_application(f)
452 {
453  const std::vector<exprt> &fun_args = f.arguments();
454  std::size_t i = 0;
455  if(fun_args.size() >= 2 && fun_args[1].type().id() == ID_pointer)
456  {
457  string_res = array_pool.find(fun_args[1], fun_args[0]);
458  i = 2;
459  }
460 
461  for(; i < fun_args.size(); ++i)
462  {
463  const auto arg = expr_try_dynamic_cast<struct_exprt>(fun_args[i]);
464  // TODO: use is_refined_string_type ?
465  if(
466  arg && arg->operands().size() == 2 &&
467  arg->op1().type().id() == ID_pointer)
468  {
469  INVARIANT(is_refined_string_type(arg->type()), "should be a string");
470  string_args.push_back(array_pool.find(arg->op1(), arg->op0()));
471  }
472  else
473  args.push_back(fun_args[i]);
474  }
475 }
476 
478  string_constraint_generatort &generator) const
479 {
480  auto pair =
482  pair.second.existential.push_back(equal_exprt(pair.first, return_code));
483  return pair.second;
484 }
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
bitvector_typet char_type()
Definition: c_types.cpp:124
Boolean AND.
Definition: std_expr.h:1974
Array constructor from list of elements.
Definition: std_expr.h:1476
Correspondance between arrays and pointers string representations.
Definition: array_pool.h:42
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
array_string_exprt find(const exprt &pointer, const exprt &length)
Creates a new array if the pointer is not pointing to an array.
Definition: array_pool.cpp:184
const typet & length_type() const
Definition: string_expr.h:69
exprt & content()
Definition: string_expr.h:74
Arrays with given size.
Definition: std_types.h:763
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:777
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
Equality.
Definition: std_expr.h:1225
Base class for all expressions.
Definition: expr.h:54
std::vector< exprt > operandst
Definition: expr.h:56
bool is_true() const
Return whether the expression is a constant representing true.
Definition: expr.cpp:33
typet & type()
Return the type of the expression.
Definition: expr.h:82
bool is_constant() const
Return whether the expression is a constant.
Definition: expr.cpp:26
Application of (mathematical) function.
The trinary if-then-else operator.
Definition: std_expr.h:2226
exprt & true_case()
Definition: std_expr.h:2253
exprt & false_case()
Definition: std_expr.h:2263
exprt & cond()
Definition: std_expr.h:2243
Boolean implication.
Definition: std_expr.h:2037
const irep_idt & id() const
Definition: irep.h:396
Binary minus.
Definition: std_expr.h:973
Binary multiplication Associativity is not specified.
Definition: std_expr.h:1019
Boolean OR.
Definition: std_expr.h:2082
The plus expression Associativity is not specified.
Definition: std_expr.h:914
std::vector< array_string_exprt > string_args
optionalt< array_string_exprt > string_res
string_builtin_function_with_no_evalt(const exprt &return_code, const function_application_exprt &f, array_poolt &array_pool)
function_application_exprt function_application
string_constraintst constraints(string_constraint_generatort &generator) const override
Add constraints ensuring that the value of result expression of the builtin function corresponds to t...
Base class for string functions that are built in the solver.
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(string_constraint_generatort &generator) const override
Set of constraints enforcing that result is the concatenation of input with character.
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
std::pair< exprt, string_constraintst > add_axioms_for_string_of_int_with_radix(const array_string_exprt &res, const exprt &input_int, const exprt &radix, size_t max_size)
Add axioms enforcing that the string corresponds to the result of String.valueOf(II) or String....
std::pair< exprt, string_constraintst > add_axioms_for_function_application(const function_application_exprt &expr)
strings contained in this call are converted to objects of type string_exprt, through adding axioms.
string_creation_builtin_functiont(const exprt &return_code, const std::vector< exprt > &fun_args, array_poolt &array_pool)
Constructor from arguments of a function application.
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_constraintst constraints(string_constraint_generatort &generator) const override
Add constraints ensuring that the value of result expression of the builtin function corresponds to t...
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(string_constraint_generatort &generator) const override
Set of constraints ensuring that result is similar to input where the character at index position is ...
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
string_constraintst constraints(string_constraint_generatort &generator) const override
Set of constraints ensuring result corresponds to input in which uppercase characters have been conve...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_constraintst constraints(class symbol_generatort &fresh_symbol) const
Set of constraints ensuring result corresponds to input in which lowercase characters of Basic Latin ...
optionalt< exprt > eval(const std::function< exprt(const exprt &)> &get_value) const override
Given a function get_value which gives a valuation to expressions, attempt to find the result of the ...
exprt length_constraint() const override
Constraint ensuring that the length of the strings are coherent with the function call.
string_transformation_builtin_functiont(exprt return_code, array_string_exprt result, array_string_exprt input, array_poolt &array_pool)
Expression to hold a symbol (variable)
Definition: std_expr.h:80
Generation of fresh symbols of a given type.
Definition: array_pool.h:22
const typet & subtype() const
Definition: type.h:156
The type of an expression, extends irept.
Definition: type.h:29
The unary minus expression.
Definition: std_expr.h:390
nonstd::optional< T > optionalt
Definition: optional.h:35
bool is_refined_string_type(const typet &type)
BigInt mp_integer
Definition: smt_terms.h:12
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:22
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2291
static exprt is_lower_case(const exprt &character)
Expression which is true for lower_case characters of the Basic Latin and Latin-1 supplement of unico...
static array_string_exprt make_string(Iter begin, Iter end, const array_typet &array_type)
static exprt is_upper_case(const exprt &character)
Expression which is true for uppercase characters of the Basic Latin and Latin-1 supplement of unicod...
static bool eval_is_upper_case(const mp_integer &c)
optionalt< std::vector< mp_integer > > eval_string(const array_string_exprt &a, const std::function< exprt(const exprt &)> &get_value)
Given a function get_value which gives a valuation to expressions, attempt to find the current value ...
#define CHARACTER_FOR_UNKNOWN
Module: String solver Author: Diffblue Ltd.
exprt length_constraint_for_concat_char(const array_string_exprt &res, const array_string_exprt &s1, array_poolt &array_pool)
Add axioms enforcing that the length of res is that of the concatenation of s1 with.
Generates string constraints to link results from string functions with their arguments.
exprt minimum(const exprt &a, const exprt &b)
exprt zero_if_negative(const exprt &expr)
Returns a non-negative version of the argument.
size_t max_printed_string_length(const typet &type, unsigned long ul_radix)
Calculate the string length needed to represent any value of the given type using the given radix.
array_string_exprt & to_array_string_expr(exprt &expr)
Definition: string_expr.h:95
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< exprt > existential
std::vector< string_constraintt > universal
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition: type.h:177