38 auto original_goto_model =
41 if(!original_goto_model.has_value())
43 log.
error() <<
"Unable to read goto binary for linker script merging"
49 std::list<irep_idt> linker_defined_symbols;
51 linker_defined_symbols,
52 original_goto_model->symbol_table,
54 linker_def_outfile());
79 if(pair == original_goto_model->goto_functions.function_map.end())
89 original_goto_model->symbol_table,
109 log.
error() <<
"Could not pointerize all linker-defined expressions"
116 *original_goto_model,
122 log.
error() <<
"Could not write linkerscript-augmented binary"
130 const std::string &elf_binary,
131 const std::string &goto_binary,
134 : elf_binary(elf_binary),
135 goto_binary(goto_binary),
137 log(message_handler),
138 replacement_predicates(
140 "address of array's first member",
145 [](
const exprt &expr) {
146 return expr.
id() == ID_address_of &&
147 expr.
type().
id() == ID_pointer &&
155 .
id() == ID_symbol &&
163 .
id() == ID_constant &&
167 .
id() == ID_signedbv;
174 [](
const exprt &expr) {
175 return expr.
id() == ID_address_of &&
176 expr.
type().
id() == ID_pointer &&
186 [](
const exprt &expr) {
187 return expr.
id() == ID_address_of &&
188 expr.
type().
id() == ID_pointer &&
200 [](
const exprt &expr) {
201 return expr.
id() == ID_symbol && expr.
type().
id() == ID_array;
204 "pointer (does not need pointerizing)",
208 [](
const exprt &expr) {
209 return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer;
221 for(
const auto &pair : linker_values)
229 entry.
value=pair.second.second;
236 std::list<symbol_exprt> to_pointerize;
239 if(to_pointerize.empty())
241 log.
debug() <<
"Pointerizing the symbol-table value of symbol "
247 if(to_pointerize.empty() && fail==0)
250 for(
const auto &sym : to_pointerize)
252 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
253 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n"
254 << sym.pretty() <<
"\n";
266 for(
exprt *insts : std::list<exprt *>(
267 {&(instruction.code_nonconst()), &(instruction.guard)}))
269 std::list<symbol_exprt> to_pointerize;
271 if(to_pointerize.empty())
275 if(to_pointerize.empty() && fail==0)
278 for(
const auto &sym : to_pointerize)
280 log.
error() <<
" Could not pointerize '" << sym.get_identifier()
281 <<
"' in function " << gf.first <<
". Pretty:\n"
282 << sym.pretty() <<
"\n";
297 const std::string &shape)
299 auto it=linker_values.find(ident);
300 if(it==linker_values.end())
302 log.
error() <<
"Could not find a new expression for linker script-defined "
308 log.
debug() <<
"Pointerizing linker-defined symbol '" << ident
316 std::list<symbol_exprt> &to_pointerize,
320 for(
auto const &pair : linker_values)
323 if(!pattern.match(expr))
326 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
329 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
330 pattern.description());
332 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
334 if(result==to_pointerize.end())
341 to_pointerize.erase(result);
360 std::list<symbol_exprt> &to_pointerize)
const
362 for(
const auto &op : expr.
operands())
364 if(op.id()!=ID_symbol)
368 if(pair!=linker_values.end())
369 to_pointerize.push_back(sym_exp);
371 for(
const auto &op : expr.
operands())
376 The current implementation of
this function is less precise than the
377 commented-out version below. To understand the difference between these
378 implementations, consider the following example:
380 Suppose we have a section in the linker script, 100 bytes long, where the
381 address of the symbol sec_start is the start of the section (value 4096) and the
382 address of sec_end is the end of that section (value 4196).
384 The current implementation synthesizes the
goto-version of the following C:
386 char __sec_array[100];
387 char *sec_start=(&__sec_array[0]);
388 char *sec_end=((&__sec_array[0])+100);
392 This is imprecise
for the following reason: the actual address of the array and
393 the pointers shall be some random CBMC-
internal address, instead of being 4096
394 and 4196. The linker script, on the other hand, would have specified the exact
395 position of the section, and we even know what the actual values of sec_start
396 and sec_end are from the
object file (these values are in the `addresses` list
397 of the `
data` argument to
this function). If the correctness of the code depends
398 on these actual values, then CBMCs model of the code is too imprecise to verify
401 The commented-out version of
this function below synthesizes the following:
403 char *sec_start=4096;
407 This code has both the actual addresses of the start and end of the section and
408 tells CBMC that the intermediate region is valid. However, the allocated_memory
409 macro does not currently allocate an actual
object at the address 4096, so
410 symbolic execution can fail. In particular, the
'allocated memory' is part of
411 __CPROVER_memory, which does not have a bounded size;
this means that (
for
412 example) calls to memcpy or memset fail, because the first and third arguments
413 do not have know n size. The commented-out implementation should be reinstated
416 In either
case, no other changes to the rest of the code (outside
this function)
417 should be necessary. The rest of
this file converts expressions containing the
418 linker-defined symbol into pointer types
if they were not already, and
this is
419 the right behaviour
for both implementations.
423 const std::string &linker_script,
430 std::map<irep_idt, std::size_t> truncated_symbols;
433 bool has_end=d[
"has-end-symbol"].is_true();
437 << d[
"start-symbol"].value;
446 log.warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
447 << array_size <<
" is too large to model. Truncating to "
460 std::ostringstream array_comment;
461 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
462 << array_size <<
" bytes";
472 linker_values.emplace(
473 d[
"start-symbol"].value, std::make_pair(start_sym, array_start));
477 auto it = std::find_if(
480 [&d](
const jsont &add) {
481 return add[
"sym"].
value == d[
"start-symbol"].value;
485 log.error() <<
"Start: Could not find address corresponding to symbol '"
486 << d[
"start-symbol"].value <<
"' (start of section)"
492 std::ostringstream start_comment;
493 start_comment <<
"Pointer to beginning of object section '"
494 << d[
"section"].value <<
"'. Original address in object file"
495 <<
" is " << (*it)[
"val"].value;
504 initialize_instructions.push_front(start_assign_i);
508 plus_exprt array_end(array_start, array_size_expr);
511 linker_values.emplace(
512 d[
"end-symbol"].value, std::make_pair(end_sym, array_end));
514 auto entry = std::find_if(
517 [&d](
const jsont &add) {
518 return add[
"sym"].
value == d[
"end-symbol"].value;
522 log.debug() <<
"Could not find address corresponding to symbol '"
523 << d[
"end-symbol"].value <<
"' (end of section)"
529 std::ostringstream end_comment;
530 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
531 <<
"'. Original address in object file"
532 <<
" is " << (*entry)[
"val"].value;
540 initialize_instructions.push_front(end_assign_i);
551 array_sym.
type=array_type;
553 symbol_table.
add(array_sym);
564 initialize_instructions.push_front(array_assign_i);
574 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
575 if(it!=linker_values.end())
582 const auto &pair=truncated_symbols.find(d[
"sym"].value);
583 if(pair==truncated_symbols.end())
584 symbol_value=d[
"val"].value;
588 <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
590 <<
" because it corresponds to the size of a too-large section."
597 loc.
set_comment(
"linker script-defined symbol: char *"+
598 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
610 linker_values.emplace(
611 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
617 initialize_instructions.push_front(assign_i);
636 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
638 f.add_source_location()=loc;
641 i.make_function_call(f);
642 initialize_instructions.push_front(i);
653 symbol_table.
add(sym);
660 loc.
set_comment(
"linker script-defined symbol: char *"+
661 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
673 linker_values.emplace(
674 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
677 assign.add_source_location()=loc;
679 assign_i.make_assignment(assign);
680 initialize_instructions.push_front(assign_i);
687 std::list<irep_idt> &linker_defined_symbols,
689 const std::string &out_file,
690 const std::string &def_out_file)
692 for(
auto const &pair : symbol_table.
symbols)
695 pair.second.is_extern && pair.second.value.is_nil() &&
698 linker_defined_symbols.push_back(pair.second.name);
702 std::ostringstream linker_def_str;
704 linker_defined_symbols.begin(),
705 linker_defined_symbols.end(),
706 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
707 log.
debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
711 std::ofstream linker_def_file(linker_def_infile());
712 linker_def_file << linker_def_str.str();
713 linker_def_file.close();
715 std::vector<std::string> argv=
719 "--object", out_file,
720 "--sym-file", linker_def_infile(),
721 "--out-file", def_out_file
725 argv.push_back(
"--very-verbose");
727 argv.push_back(
"--verbose");
730 for(std::size_t i=0; i<argv.size(); i++)
734 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
742 const std::list<irep_idt> &linker_defined_symbols,
746 for(
const auto &sym : linker_defined_symbols)
747 if(linker_values.find(sym)==linker_values.end())
751 <<
"' was declared extern but never given "
755 for(
const auto &pair : linker_values)
757 auto it=std::find(linker_defined_symbols.begin(),
758 linker_defined_symbols.end(), pair.first);
759 if(it==linker_defined_symbols.end())
763 <<
"Linker script-defined symbol '" << pair.first <<
"' was "
764 <<
"either defined in the C source code, not declared extern in "
765 <<
"the C source code, or does not appear in the C source code"
774 if(!
data.is_object())
779 !(data_object.
find(
"regions") != data_object.
end() &&
780 data_object.
find(
"addresses") != data_object.
end() &&
781 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
790 return address.
find(
"val") != address.end() &&
791 address.find(
"sym") != address.end() &&
792 address[
"val"].is_number() && address[
"sym"].is_string();
802 return region.
find(
"start") != region.end() &&
803 region.find(
"size") != region.end() &&
804 region.find(
"annot") != region.end() &&
805 region.find(
"commt") != region.end() &&
806 region.find(
"start-symbol") != region.end() &&
807 region.find(
"has-end-symbol") != region.end() &&
808 region.find(
"section") != region.end() &&
809 region[
"start"].is_number() && region[
"size"].is_number() &&
810 region[
"annot"].is_string() &&
811 region[
"start-symbol"].is_string() &&
812 region[
"section"].is_string() && region[
"commt"].is_string() &&
813 ((region[
"has-end-symbol"].is_true() &&
814 region.find(
"end-symbol") != region.end() &&
815 region[
"end-symbol"].is_string()) ||
816 (region[
"has-end-symbol"].is_false() &&
817 region.find(
"size-symbol") != region.end() &&
818 region.find(
"end-symbol") == region.end() &&
819 region[
"size-symbol"].is_string()));