cprover
ci_lazy_methods.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Java Bytecode
4 
5 Author: Diffblue Ltd.
6 
7 \*******************************************************************/
8 
9 #include "ci_lazy_methods.h"
10 #include "java_bytecode_language.h"
11 #include "java_class_loader.h"
12 #include "java_entry_point.h"
13 #include "remove_exceptions.h"
14 
15 #include <util/expr_iterator.h>
16 #include <util/namespace.h>
17 #include <util/suffix.h>
18 
20 
37  const symbol_tablet &symbol_table,
38  const irep_idt &main_class,
39  const std::vector<irep_idt> &main_jar_classes,
40  const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
41  java_class_loadert &java_class_loader,
42  const std::vector<irep_idt> &extra_instantiated_classes,
43  const select_pointer_typet &pointer_type_selector,
44  const synthetic_methods_mapt &synthetic_methods)
45  : main_class(main_class),
46  main_jar_classes(main_jar_classes),
47  lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
48  java_class_loader(java_class_loader),
49  extra_instantiated_classes(extra_instantiated_classes),
50  pointer_type_selector(pointer_type_selector),
51  synthetic_methods(synthetic_methods)
52 {
53  // build the class hierarchy
54  class_hierarchy(symbol_table);
55 }
56 
63 static bool references_class_model(const exprt &expr)
64 {
65  static const struct_tag_typet class_type("java::java.lang.Class");
66 
67  for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
68  {
70  it->type() == class_type &&
71  has_suffix(
74  {
75  return true;
76  }
77  }
78 
79  return false;
80 }
81 
99  symbol_tablet &symbol_table,
100  method_bytecodet &method_bytecode,
101  const method_convertert &method_converter,
102  message_handlert &message_handler)
103 {
104  std::unordered_set<irep_idt> methods_to_convert_later =
105  entry_point_methods(symbol_table, message_handler);
106 
107  // Add any extra entry points specified; we should elaborate these in the
108  // same way as the main function.
109  for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
110  {
111  std::vector<irep_idt> extra_methods =
112  extra_function_generator(symbol_table);
113  methods_to_convert_later.insert(extra_methods.begin(), extra_methods.end());
114  }
115 
116  std::unordered_set<irep_idt> instantiated_classes;
117 
118  {
119  std::unordered_set<irep_idt> initial_callable_methods;
120  ci_lazy_methods_neededt initial_lazy_methods(
121  initial_callable_methods,
122  instantiated_classes,
123  symbol_table,
126  methods_to_convert_later, namespacet(symbol_table), initial_lazy_methods);
127  methods_to_convert_later.insert(
128  initial_callable_methods.begin(), initial_callable_methods.end());
129  }
130 
131  std::unordered_set<irep_idt> methods_already_populated;
132  std::unordered_set<class_method_descriptor_exprt, irep_hash>
133  called_virtual_functions;
134  bool class_initializer_seen = false;
135 
136  messaget log{message_handler};
137 
138  bool any_new_classes = true;
139  while(any_new_classes)
140  {
141  bool any_new_methods = true;
142  while(any_new_methods)
143  {
144  any_new_methods = false;
145  while(!methods_to_convert_later.empty())
146  {
147  std::unordered_set<irep_idt> methods_to_convert;
148  std::swap(methods_to_convert, methods_to_convert_later);
149  for(const auto &mname : methods_to_convert)
150  {
151  const auto conversion_result = convert_and_analyze_method(
152  method_converter,
153  methods_already_populated,
154  class_initializer_seen,
155  mname,
156  symbol_table,
157  methods_to_convert_later,
158  instantiated_classes,
159  called_virtual_functions,
160  message_handler);
161  any_new_methods |= conversion_result.new_method_seen;
162  class_initializer_seen |= conversion_result.class_initializer_seen;
163  }
164  }
165 
166  // Given the object types we now know may be created, populate more
167  // possible virtual function call targets:
168 
169  log.debug() << "CI lazy methods: add virtual method targets ("
170  << called_virtual_functions.size() << " callsites)"
171  << messaget::eom;
172 
173  for(const class_method_descriptor_exprt &called_virtual_function :
174  called_virtual_functions)
175  {
177  called_virtual_function,
178  instantiated_classes,
179  methods_to_convert_later,
180  symbol_table);
181  }
182  }
183 
184  any_new_classes = handle_virtual_methods_with_no_callees(
185  methods_to_convert_later,
186  instantiated_classes,
187  called_virtual_functions,
188  symbol_table);
189  }
190 
191  // Remove symbols for methods that were declared but never used:
192  symbol_tablet keep_symbols;
193  // Manually keep @inflight_exception, as it is unused at this stage
194  // but will become used when the `remove_exceptions` pass is run:
195  keep_symbols.add(symbol_table.lookup_ref(INFLIGHT_EXCEPTION_VARIABLE_NAME));
196 
197  for(const auto &sym : symbol_table.symbols)
198  {
199  // Don't keep global variables (unless they're gathered below from a
200  // function that references them)
201  if(sym.second.is_static_lifetime)
202  continue;
203  if(sym.second.type.id()==ID_code)
204  {
205  // Don't keep functions that belong to this language that we haven't
206  // converted above
207  if(
208  (method_bytecode.contains_method(sym.first) ||
209  synthetic_methods.count(sym.first)) &&
210  !methods_already_populated.count(sym.first))
211  {
212  continue;
213  }
214  // If this is a function then add all the things used in it
215  gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
216  }
217  keep_symbols.add(sym.second);
218  }
219 
220  log.debug() << "CI lazy methods: removed "
221  << symbol_table.symbols.size() - keep_symbols.symbols.size()
222  << " unreachable methods and globals" << messaget::eom;
223 
224  symbol_table.swap(keep_symbols);
225 
226  return false;
227 }
228 
237  std::unordered_set<irep_idt> &methods_to_convert_later,
238  std::unordered_set<irep_idt> &instantiated_classes,
239  const std::unordered_set<class_method_descriptor_exprt, irep_hash>
240  &virtual_functions,
241  symbol_tablet &symbol_table)
242 {
243  ci_lazy_methods_neededt lazy_methods_loader(
244  methods_to_convert_later,
245  instantiated_classes,
246  symbol_table,
248 
249  bool any_new_classes = false;
250  for(const class_method_descriptor_exprt &virtual_function : virtual_functions)
251  {
252  std::unordered_set<irep_idt> candidate_target_methods;
254  virtual_function,
255  instantiated_classes,
256  candidate_target_methods,
257  symbol_table);
258 
259  if(!candidate_target_methods.empty())
260  continue;
261 
262  const java_method_typet &java_method_type =
263  to_java_method_type(virtual_function.type());
264 
265  // Add the call class to instantiated_classes and assert that it
266  // didn't already exist. It can't be instantiated already, otherwise it
267  // would give a concrete definition of the called method, and
268  // candidate_target_methods would be non-empty.
269  const irep_idt &call_class = virtual_function.class_id();
270  bool was_missing = instantiated_classes.count(call_class) == 0;
271  CHECK_RETURN(was_missing);
272  any_new_classes = true;
273 
274  const typet &this_type = java_method_type.get_this()->type();
275  if(
276  const pointer_typet *this_pointer_type =
277  type_try_dynamic_cast<pointer_typet>(this_type))
278  {
279  lazy_methods_loader.add_all_needed_classes(*this_pointer_type);
280  }
281 
282  // That should in particular have added call_class to the possibly
283  // instantiated types.
284  bool still_missing = instantiated_classes.count(call_class) == 0;
285  CHECK_RETURN(!still_missing);
286 
287  // Make sure we add our return type as required, as we may not have
288  // seen any concrete instances of it being created.
289  const typet &return_type = java_method_type.return_type();
290  if(
291  const pointer_typet *return_pointer_type =
292  type_try_dynamic_cast<pointer_typet>(return_type))
293  {
294  lazy_methods_loader.add_all_needed_classes(*return_pointer_type);
295  }
296 
297  // Check that `get_virtual_method_target` returns a method now
298  const irep_idt &method_name = virtual_function.mangled_method_name();
299  const irep_idt method_id = get_virtual_method_target(
300  instantiated_classes, method_name, call_class, symbol_table);
301  CHECK_RETURN(!method_id.empty());
302 
303  // Add what it returns to methods_to_convert_later
304  methods_to_convert_later.insert(method_id);
305  }
306  return any_new_classes;
307 }
308 
320  const method_convertert &method_converter,
321  std::unordered_set<irep_idt> &methods_already_populated,
322  const bool class_initializer_already_seen,
323  const irep_idt &method_name,
324  symbol_tablet &symbol_table,
325  std::unordered_set<irep_idt> &methods_to_convert_later,
326  std::unordered_set<irep_idt> &instantiated_classes,
327  std::unordered_set<class_method_descriptor_exprt, irep_hash>
328  &called_virtual_functions,
329  message_handlert &message_handler)
330 {
331  convert_method_resultt result;
332  if(!methods_already_populated.insert(method_name).second)
333  return result;
334 
335  messaget log{message_handler};
336  log.debug() << "CI lazy methods: elaborate " << method_name << messaget::eom;
337 
338  // Note this wraps *references* to methods_to_convert_later &
339  // instantiated_classes
340  ci_lazy_methods_neededt needed_methods(
341  methods_to_convert_later,
342  instantiated_classes,
343  symbol_table,
345 
346  if(method_converter(method_name, needed_methods))
347  return result;
348 
349  const exprt &method_body = symbol_table.lookup_ref(method_name).value;
350  gather_virtual_callsites(method_body, called_virtual_functions);
351 
352  if(!class_initializer_already_seen && references_class_model(method_body))
353  {
354  result.class_initializer_seen = true;
355  const irep_idt initializer_signature =
357  if(symbol_table.has_symbol(initializer_signature))
358  methods_to_convert_later.insert(initializer_signature);
359  }
360  result.new_method_seen = true;
361  return result;
362 }
363 
369 std::unordered_set<irep_idt> ci_lazy_methodst::entry_point_methods(
370  const symbol_tablet &symbol_table,
371  message_handlert &message_handler)
372 {
373  std::unordered_set<irep_idt> methods_to_convert_later;
374 
375  const main_function_resultt main_function =
376  get_main_symbol(symbol_table, this->main_class, message_handler);
377  if(!main_function.is_success())
378  {
379  // Failed, mark all functions in the given main class(es)
380  // reachable.
381  std::vector<irep_idt> reachable_classes;
382  if(!this->main_class.empty())
383  reachable_classes.push_back(this->main_class);
384  else
385  reachable_classes = this->main_jar_classes;
386  for(const irep_idt &class_name : reachable_classes)
387  {
388  const auto &methods =
389  this->java_class_loader.get_original_class(class_name)
391  for(const auto &method : methods)
392  {
393  const irep_idt methodid = "java::" + id2string(class_name) + "." +
394  id2string(method.name) + ":" +
395  id2string(method.descriptor);
396  methods_to_convert_later.insert(methodid);
397  }
398  }
399  }
400  else
401  methods_to_convert_later.insert(main_function.main_function.name);
402  return methods_to_convert_later;
403 }
404 
414  const std::unordered_set<irep_idt> &entry_points,
415  const namespacet &ns,
416  ci_lazy_methods_neededt &needed_lazy_methods)
417 {
418  for(const auto &mname : entry_points)
419  {
420  const auto &symbol=ns.lookup(mname);
421  const auto &mtype = to_java_method_type(symbol.type);
422  for(const auto &param : mtype.parameters())
423  {
424  if(param.type().id()==ID_pointer)
425  {
426  const pointer_typet &original_pointer = to_pointer_type(param.type());
427  needed_lazy_methods.add_all_needed_classes(original_pointer);
428  }
429  }
430  }
431 
432  // Also add classes whose instances are magically
433  // created by the JVM and so won't be spotted by
434  // looking for constructors and calls as usual:
435  needed_lazy_methods.add_needed_class("java::java.lang.String");
436  needed_lazy_methods.add_needed_class("java::java.lang.Class");
437  needed_lazy_methods.add_needed_class("java::java.lang.Object");
438 
439  // As in class_loader, ensure these classes stay available
440  for(const auto &id : extra_instantiated_classes)
441  needed_lazy_methods.add_needed_class("java::" + id2string(id));
442 }
443 
449  const exprt &e,
450  std::unordered_set<class_method_descriptor_exprt, irep_hash> &result)
451 {
452  if(e.id()!=ID_code)
453  return;
454  const codet &c=to_code(e);
455  if(
456  c.get_statement() == ID_function_call &&
458  to_code_function_call(c).function()))
459  {
460  result.insert(
462  }
463  else
464  {
465  for(const exprt &op : e.operands())
466  gather_virtual_callsites(op, result);
467  }
468 }
469 
481  const class_method_descriptor_exprt &called_function,
482  const std::unordered_set<irep_idt> &instantiated_classes,
483  std::unordered_set<irep_idt> &callable_methods,
484  symbol_tablet &symbol_table)
485 {
486  const auto &call_class = called_function.class_id();
487  const auto &method_name = called_function.mangled_method_name();
488 
489  class_hierarchyt::idst self_and_child_classes =
491  self_and_child_classes.push_back(call_class);
492 
493  for(const irep_idt &class_name : self_and_child_classes)
494  {
495  const irep_idt method_id = get_virtual_method_target(
496  instantiated_classes, method_name, class_name, symbol_table);
497  if(!method_id.empty())
498  callable_methods.insert(method_id);
499  }
500 }
501 
508  const exprt &e,
509  const symbol_tablet &symbol_table,
510  symbol_tablet &needed)
511 {
512  if(e.id()==ID_symbol)
513  {
514  // If the symbol isn't in the symbol table at all, then it is defined
515  // on an opaque type (i.e. we don't have the class definition at this point)
516  // and will be created during the typecheck phase.
517  // We don't mark it as 'needed' as it doesn't exist yet to keep.
518  const auto findit=
519  symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
520  if(findit!=symbol_table.symbols.end() &&
521  findit->second.is_static_lifetime)
522  {
523  needed.add(findit->second);
524  // Gather any globals referenced in the initialiser:
525  gather_needed_globals(findit->second.value, symbol_table, needed);
526  }
527  }
528  else
529  forall_operands(opit, e)
530  gather_needed_globals(*opit, symbol_table, needed);
531 }
532 
546  const std::unordered_set<irep_idt> &instantiated_classes,
547  const irep_idt &call_basename,
548  const irep_idt &classname,
549  const symbol_tablet &symbol_table)
550 {
551  // Program-wide, is this class ever instantiated?
552  if(!instantiated_classes.count(classname))
553  return irep_idt();
554 
555  auto resolved_call =
556  get_inherited_method_implementation(call_basename, classname, symbol_table);
557 
558  if(resolved_call)
559  return resolved_call->get_full_component_identifier();
560  else
561  return irep_idt();
562 }
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
Collect methods needed to be loaded using the lazy method.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced,...
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
const std::vector< irep_idt > & extra_instantiated_classes
void gather_needed_globals(const exprt &e, const symbol_tablet &symbol_table, symbol_tablet &needed)
See output.
std::vector< irep_idt > main_jar_classes
ci_lazy_methodst(const symbol_tablet &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< load_extra_methodst > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
const select_pointer_typet & pointer_type_selector
const std::vector< load_extra_methodst > & lazy_methods_extra_entry_points
class_hierarchyt class_hierarchy
void gather_virtual_callsites(const exprt &e, std::unordered_set< class_method_descriptor_exprt, irep_hash > &result)
Get places where virtual functions are called.
java_class_loadert & java_class_loader
void get_virtual_method_targets(const class_method_descriptor_exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_tablet &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
const synthetic_methods_mapt & synthetic_methods
std::unordered_set< irep_idt > entry_point_methods(const symbol_tablet &symbol_table, message_handlert &message_handler)
Entry point methods are either:
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_tablet &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< class_method_descriptor_exprt, irep_hash > &called_virtual_functions, message_handlert &message_handler)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point.
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< class_method_descriptor_exprt, irep_hash > &virtual_functions, symbol_tablet &symbol_table)
Look for virtual callsites with no candidate targets.
bool operator()(symbol_tablet &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter, message_handlert &message_handler)
Uses a simple context-insensitive ('ci') analysis to determine which methods may be reachable from th...
idst get_children_trans(const irep_idt &id) const
std::vector< irep_idt > idst
An expression describing a method on a class.
Definition: std_expr.h:3272
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition: std_expr.h:3317
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition: std_expr.h:3309
const typet & return_type() const
Definition: std_types.h:645
const parametert * get_this() const
Definition: std_types.h:621
Data structure for representing an arbitrary statement in a program.
Definition: std_code_base.h:29
const irep_idt & get_statement() const
Definition: std_code_base.h:65
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
bool empty() const
Definition: dstring.h:88
Base class for all expressions.
Definition: expr.h:54
depth_iteratort depth_end()
Definition: expr.cpp:267
depth_iteratort depth_begin()
Definition: expr.cpp:265
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
const irep_idt & id() const
Definition: irep.h:396
Class responsible to load .class files.
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
static eomt eom
Definition: message.h:297
bool contains_method(const irep_idt &method_id) const
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:138
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
Definition: pointer_expr.h:24
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:449
const irep_idt & get_identifier() const
Definition: std_expr.h:109
const symbolst & symbols
Read-only field, used to look up symbols given their names.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:14
void swap(symbol_tablet &other)
Swap symbol maps between two symbol tables.
Definition: symbol_table.h:74
irep_idt name
The unique identifier.
Definition: symbol.h:40
exprt value
Initial value of symbol.
Definition: symbol.h:34
The type of an expression, extends irept.
Definition: type.h:29
#define forall_operands(it, expr)
Definition: expr.h:18
Forward depth-first search iterators These iterators' copy operations are expensive,...
const code_function_callt & to_code_function_call(const codet &code)
dstringt irep_idt
Definition: irep.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
#define JAVA_CLASS_MODEL_SUFFIX
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class' initializer method.
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:184
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:79
Remove function exceptional returns.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
static optionalt< smt_termt > get_identifier(const exprt &expr, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_handle_identifiers, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_identifiers)
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
const codet & to_code(const exprt &expr)
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:189
bool can_cast_expr< class_method_descriptor_exprt >(const exprt &base)
Definition: std_expr.h:3372
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition: std_expr.h:173
const class_method_descriptor_exprt & to_class_method_descriptor_expr(const exprt &expr)
Cast an exprt to a class_method_descriptor_exprt.
Definition: std_expr.h:3362
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:17
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.