36 auto original_goto_model =
39 if(!original_goto_model.has_value())
41 log.
error() <<
"Unable to read goto binary for linker script merging"
47 std::list<irep_idt> linker_defined_symbols;
49 linker_defined_symbols,
50 original_goto_model->symbol_table,
52 linker_def_outfile());
77 if(pair == original_goto_model->goto_functions.function_map.end())
87 original_goto_model->symbol_table,
107 log.
error() <<
"Could not pointerize all linker-defined expressions"
114 *original_goto_model,
120 log.
error() <<
"Could not write linkerscript-augmented binary"
128 const std::string &elf_binary,
129 const std::string &goto_binary,
132 : elf_binary(elf_binary),
133 goto_binary(goto_binary),
135 log(message_handler),
136 replacement_predicates(
138 "address of array's first member",
143 [](
const exprt &expr) {
144 return expr.
id() == ID_address_of &&
145 expr.
type().
id() == ID_pointer &&
153 .
id() == ID_symbol &&
161 .
id() == ID_constant &&
165 .
id() == ID_signedbv;
172 [](
const exprt &expr) {
173 return expr.
id() == ID_address_of &&
174 expr.
type().
id() == ID_pointer &&
184 [](
const exprt &expr) {
185 return expr.
id() == ID_address_of &&
186 expr.
type().
id() == ID_pointer &&
198 [](
const exprt &expr) {
199 return expr.
id() == ID_symbol && expr.
type().
id() == ID_array;
202 "pointer (does not need pointerizing)",
206 [](
const exprt &expr) {
207 return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer;
219 for(
const auto &pair : linker_values)
227 entry.
value=pair.second.second;
234 std::list<symbol_exprt> to_pointerize;
237 if(to_pointerize.empty())
239 log.
debug() <<
"Pointerizing the symbol-table value of symbol "
245 if(to_pointerize.empty() && fail==0)
248 for(
const auto &sym : to_pointerize)
250 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
251 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n"
252 << sym.pretty() <<
"\n";
265 std::list<exprt *>({&(instruction.code), &(instruction.guard)}))
267 std::list<symbol_exprt> to_pointerize;
269 if(to_pointerize.empty())
273 if(to_pointerize.empty() && fail==0)
276 for(
const auto &sym : to_pointerize)
278 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
279 <<
"' in function " << gf.first <<
". Pretty:\n"
280 << sym.pretty() <<
"\n";
295 const std::string &shape)
297 auto it=linker_values.find(ident);
298 if(it==linker_values.end())
300 log.
error() <<
"Could not find a new expression for linker script-defined "
306 log.
debug() <<
"Pointerizing linker-defined symbol '" << ident
314 std::list<symbol_exprt> &to_pointerize,
318 for(
auto const &pair : linker_values)
321 if(!pattern.match(expr))
324 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
327 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
328 pattern.description());
330 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
332 if(result==to_pointerize.end())
339 to_pointerize.erase(result);
358 std::list<symbol_exprt> &to_pointerize)
const
360 for(
const auto &op : expr.
operands())
362 if(op.id()!=ID_symbol)
366 if(pair!=linker_values.end())
367 to_pointerize.push_back(sym_exp);
369 for(
const auto &op : expr.
operands())
374 The current implementation of
this function is less precise than the
375 commented-out version below. To understand the difference between these
376 implementations, consider the following example:
378 Suppose we have a section in the linker script, 100 bytes long, where the
379 address of the symbol sec_start is the start of the section (value 4096) and the
380 address of sec_end is the end of that section (value 4196).
382 The current implementation synthesizes the
goto-version of the following C:
384 char __sec_array[100];
385 char *sec_start=(&__sec_array[0]);
386 char *sec_end=((&__sec_array[0])+100);
390 This is imprecise
for the following reason: the actual address of the array and
391 the pointers shall be some random CBMC-
internal address, instead of being 4096
392 and 4196. The linker script, on the other hand, would have specified the exact
393 position of the section, and we even know what the actual values of sec_start
394 and sec_end are from the
object file (these values are in the `addresses` list
395 of the `
data` argument to
this function). If the correctness of the code depends
396 on these actual values, then CBMCs model of the code is too imprecise to verify
399 The commented-out version of
this function below synthesizes the following:
401 char *sec_start=4096;
405 This code has both the actual addresses of the start and end of the section and
406 tells CBMC that the intermediate region is valid. However, the allocated_memory
407 macro does not currently allocate an actual
object at the address 4096, so
408 symbolic execution can fail. In particular, the
'allocated memory' is part of
409 __CPROVER_memory, which does not have a bounded size;
this means that (
for
410 example) calls to memcpy or memset fail, because the first and third arguments
411 do not have know n size. The commented-out implementation should be reinstated
414 In either
case, no other changes to the rest of the code (outside
this function)
415 should be necessary. The rest of
this file converts expressions containing the
416 linker-defined symbol into pointer types
if they were not already, and
this is
417 the right behaviour
for both implementations.
421 const std::string &linker_script,
428 std::map<irep_idt, std::size_t> truncated_symbols;
431 bool has_end=d[
"has-end-symbol"].is_true();
435 << d[
"start-symbol"].value;
444 log.warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
445 << array_size <<
" is too large to model. Truncating to "
458 std::ostringstream array_comment;
459 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
460 << array_size <<
" bytes";
470 linker_values.emplace(
471 d[
"start-symbol"].value, std::make_pair(start_sym, array_start));
475 auto it = std::find_if(
478 [&d](
const jsont &add) {
479 return add[
"sym"].
value == d[
"start-symbol"].value;
483 log.error() <<
"Start: Could not find address corresponding to symbol '"
484 << d[
"start-symbol"].value <<
"' (start of section)"
490 std::ostringstream start_comment;
491 start_comment <<
"Pointer to beginning of object section '"
492 << d[
"section"].value <<
"'. Original address in object file"
493 <<
" is " << (*it)[
"val"].value;
502 initialize_instructions.push_front(start_assign_i);
506 plus_exprt array_end(array_start, array_size_expr);
509 linker_values.emplace(
510 d[
"end-symbol"].value, std::make_pair(end_sym, array_end));
512 auto entry = std::find_if(
515 [&d](
const jsont &add) {
516 return add[
"sym"].
value == d[
"end-symbol"].value;
520 log.debug() <<
"Could not find address corresponding to symbol '"
521 << d[
"end-symbol"].value <<
"' (end of section)"
527 std::ostringstream end_comment;
528 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
529 <<
"'. Original address in object file"
530 <<
" is " << (*entry)[
"val"].value;
538 initialize_instructions.push_front(end_assign_i);
549 array_sym.
type=array_type;
551 symbol_table.
add(array_sym);
562 initialize_instructions.push_front(array_assign_i);
572 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
573 if(it!=linker_values.end())
580 const auto &pair=truncated_symbols.find(d[
"sym"].value);
581 if(pair==truncated_symbols.end())
582 symbol_value=d[
"val"].value;
586 <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
588 <<
" because it corresponds to the size of a too-large section."
595 loc.
set_comment(
"linker script-defined symbol: char *"+
596 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
608 linker_values.emplace(
609 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
615 initialize_instructions.push_front(assign_i);
634 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
636 f.add_source_location()=loc;
639 i.make_function_call(f);
640 initialize_instructions.push_front(i);
651 symbol_table.
add(sym);
658 loc.
set_comment(
"linker script-defined symbol: char *"+
659 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
671 linker_values.emplace(
672 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
675 assign.add_source_location()=loc;
677 assign_i.make_assignment(assign);
678 initialize_instructions.push_front(assign_i);
685 std::list<irep_idt> &linker_defined_symbols,
687 const std::string &out_file,
688 const std::string &def_out_file)
690 for(
auto const &pair : symbol_table.
symbols)
693 pair.second.is_extern && pair.second.value.is_nil() &&
696 linker_defined_symbols.push_back(pair.second.name);
700 std::ostringstream linker_def_str;
702 linker_defined_symbols.begin(),
703 linker_defined_symbols.end(),
704 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
705 log.
debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
709 std::ofstream linker_def_file(linker_def_infile());
710 linker_def_file << linker_def_str.str();
711 linker_def_file.close();
713 std::vector<std::string> argv=
717 "--object", out_file,
718 "--sym-file", linker_def_infile(),
719 "--out-file", def_out_file
723 argv.push_back(
"--very-verbose");
725 argv.push_back(
"--verbose");
728 for(std::size_t i=0; i<argv.size(); i++)
732 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
740 const std::list<irep_idt> &linker_defined_symbols,
744 for(
const auto &sym : linker_defined_symbols)
745 if(linker_values.find(sym)==linker_values.end())
749 <<
"' was declared extern but never given "
753 for(
const auto &pair : linker_values)
755 auto it=std::find(linker_defined_symbols.begin(),
756 linker_defined_symbols.end(), pair.first);
757 if(it==linker_defined_symbols.end())
761 <<
"Linker script-defined symbol '" << pair.first <<
"' was "
762 <<
"either defined in the C source code, not declared extern in "
763 <<
"the C source code, or does not appear in the C source code"
772 if(!
data.is_object())
777 !(data_object.
find(
"regions") != data_object.
end() &&
778 data_object.
find(
"addresses") != data_object.
end() &&
779 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
788 return address.
find(
"val") != address.end() &&
789 address.find(
"sym") != address.end() &&
790 address[
"val"].is_number() && address[
"sym"].is_string();
800 return region.
find(
"start") != region.end() &&
801 region.find(
"size") != region.end() &&
802 region.find(
"annot") != region.end() &&
803 region.find(
"commt") != region.end() &&
804 region.find(
"start-symbol") != region.end() &&
805 region.find(
"has-end-symbol") != region.end() &&
806 region.find(
"section") != region.end() &&
807 region[
"start"].is_number() && region[
"size"].is_number() &&
808 region[
"annot"].is_string() &&
809 region[
"start-symbol"].is_string() &&
810 region[
"section"].is_string() && region[
"commt"].is_string() &&
811 ((region[
"has-end-symbol"].is_true() &&
812 region.find(
"end-symbol") != region.end() &&
813 region[
"end-symbol"].is_string()) ||
814 (region[
"has-end-symbol"].is_false() &&
815 region.find(
"size-symbol") != region.end() &&
816 region.find(
"end-symbol") == region.end() &&
817 region[
"size-symbol"].is_string()));