cprover
remove_instanceof.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove Instance-of Operators
4 
5 Author: Chris Smowton, chris.smowton@diffblue.com
6 
7 \*******************************************************************/
8 
11 
12 #include "remove_instanceof.h"
13 
17 
19 
20 #include <util/arith_tools.h>
21 
23 {
24 public:
33  {
34  }
35 
36  // Lower instanceof for a single function
37  bool lower_instanceof(const irep_idt &function_identifier, goto_programt &);
38 
39  // Lower instanceof for a single instruction
40  bool lower_instanceof(
41  const irep_idt &function_identifier,
42  goto_programt &,
44 
45 protected:
50 
51  bool lower_instanceof(
52  const irep_idt &function_identifier,
53  exprt &,
54  goto_programt &,
56 };
57 
68  const exprt &classid_field,
69  const irep_idt &target_type,
70  const class_hierarchyt &class_hierarchy)
71 {
72  std::vector<irep_idt> children =
73  class_hierarchy.get_children_trans(target_type);
74  children.push_back(target_type);
75  // Sort alphabetically to make order of generated disjuncts
76  // independent of class loading order
77  std::sort(
78  children.begin(), children.end(), [](const irep_idt &a, const irep_idt &b) {
79  return a.compare(b) < 0;
80  });
81 
82  exprt::operandst or_ops;
83  for(const auto &class_name : children)
84  {
85  constant_exprt class_expr(class_name, string_typet());
86  equal_exprt test(classid_field, class_expr);
87  or_ops.push_back(test);
88  }
89 
90  return disjunction(or_ops);
91 }
92 
102  const irep_idt &function_identifier,
103  exprt &expr,
104  goto_programt &goto_program,
105  goto_programt::targett this_inst)
106 {
107  if(expr.id()!=ID_java_instanceof)
108  {
109  bool changed = false;
110  Forall_operands(it, expr)
111  changed |=
112  lower_instanceof(function_identifier, *it, goto_program, this_inst);
113  return changed;
114  }
115 
116  INVARIANT(
117  expr.operands().size()==2,
118  "java_instanceof should have two operands");
119 
120  const exprt &check_ptr = to_binary_expr(expr).op0();
121  INVARIANT(
122  check_ptr.type().id()==ID_pointer,
123  "instanceof first operand should be a pointer");
124 
125  const exprt &target_arg = to_binary_expr(expr).op1();
126  INVARIANT(
127  target_arg.id()==ID_type,
128  "instanceof second operand should be a type");
129 
130  INVARIANT(
131  target_arg.type().id() == ID_struct_tag,
132  "instanceof second operand should have a simple type");
133 
134  const auto &target_type = to_struct_tag_type(target_arg.type());
135 
136  const auto underlying_type_and_dimension =
138 
139  bool target_type_is_reference_array =
140  underlying_type_and_dimension.second >= 1 &&
141  can_cast_type<java_reference_typet>(underlying_type_and_dimension.first);
142 
143  // If we're casting to a reference array type, check
144  // @class_identifier == "java::array[reference]" &&
145  // @array_dimension == target_array_dimension &&
146  // @array_element_type (subtype of) target_array_element_type
147  // Otherwise just check
148  // @class_identifier (subtype of) target_type
149 
150  // Exception: when the target type is Object, nothing needs checking; when
151  // it is Object[], Object[][] etc, then we check that
152  // @array_dimension >= target_array_dimension (because
153  // String[][] (subtype of) Object[] for example) and don't check the element
154  // type.
155 
156  // All tests are made directly against a pointer to the object whose type is
157  // queried. By operating directly on a pointer rather than using a temporary
158  // (x->@clsid == "A" rather than id = x->@clsid; id == "A") we enable symex's
159  // value-set filtering to discard no-longer-viable candidates for "x" (in the
160  // case where 'x' is a symbol, not a general expression like x->y->@clsid).
161  // This means there are more clean dereference operations (ones where there
162  // is no possibility of reading a null, invalid or incorrectly-typed object),
163  // and less pessimistic aliasing assumptions possibly causing goto-symex to
164  // explore in-fact-unreachable paths.
165 
166  // In all cases require the input pointer is not null for any cast to succeed:
167 
168  std::vector<exprt> test_conjuncts;
169  test_conjuncts.push_back(notequal_exprt(
170  check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type()))));
171 
172  auto jlo = to_struct_tag_type(java_lang_object_type().subtype());
173 
174  exprt object_class_identifier_field =
175  get_class_identifier_field(check_ptr, jlo, ns);
176 
177  if(target_type_is_reference_array)
178  {
179  const auto &underlying_type =
180  to_struct_tag_type(underlying_type_and_dimension.first.subtype());
181 
182  test_conjuncts.push_back(equal_exprt(
183  object_class_identifier_field,
185 
186  exprt object_array_dimension = get_array_dimension_field(check_ptr);
187  constant_exprt target_array_dimension = from_integer(
188  underlying_type_and_dimension.second, object_array_dimension.type());
189 
190  if(underlying_type == jlo)
191  {
192  test_conjuncts.push_back(binary_relation_exprt(
193  object_array_dimension, ID_ge, target_array_dimension));
194  }
195  else
196  {
197  test_conjuncts.push_back(
198  equal_exprt(object_array_dimension, target_array_dimension));
199  test_conjuncts.push_back(subtype_expr(
200  get_array_element_type_field(check_ptr),
201  underlying_type.get_identifier(),
202  class_hierarchy));
203  }
204  }
205  else if(target_type != jlo)
206  {
207  test_conjuncts.push_back(subtype_expr(
208  get_class_identifier_field(check_ptr, jlo, ns),
209  target_type.get_identifier(),
210  class_hierarchy));
211  }
212 
213  expr = conjunction(test_conjuncts);
214 
215  return true;
216 }
217 
218 static bool contains_instanceof(const exprt &e)
219 {
220  if(e.id() == ID_java_instanceof)
221  return true;
222 
223  for(const exprt &subexpr : e.operands())
224  {
225  if(contains_instanceof(subexpr))
226  return true;
227  }
228 
229  return false;
230 }
231 
240  const irep_idt &function_identifier,
241  goto_programt &goto_program,
242  goto_programt::targett target)
243 {
244  if(
245  target->is_target() && (contains_instanceof(target->get_code()) ||
246  contains_instanceof(target->guard)))
247  {
248  // If this is a branch target, add a skip beforehand so we can splice new
249  // GOTO programs before the target instruction without inserting into the
250  // wrong basic block.
251  goto_program.insert_before_swap(target);
252  target->turn_into_skip();
253  // Actually alter the now-moved instruction:
254  ++target;
255  }
256 
257  return lower_instanceof(
258  function_identifier, target->code_nonconst(), goto_program, target) |
260  function_identifier, target->guard, goto_program, target);
261 }
262 
270  const irep_idt &function_identifier,
271  goto_programt &goto_program)
272 {
273  bool changed=false;
274  for(goto_programt::instructionst::iterator target=
275  goto_program.instructions.begin();
276  target!=goto_program.instructions.end();
277  ++target)
278  {
279  changed =
280  lower_instanceof(function_identifier, goto_program, target) || changed;
281  }
282  if(!changed)
283  return false;
284  goto_program.update();
285  return true;
286 }
287 
298  const irep_idt &function_identifier,
299  goto_programt::targett target,
300  goto_programt &goto_program,
301  symbol_table_baset &symbol_table,
302  const class_hierarchyt &class_hierarchy,
303  message_handlert &message_handler)
304 {
305  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
306  rem.lower_instanceof(function_identifier, goto_program, target);
307 }
308 
318  const irep_idt &function_identifier,
320  symbol_table_baset &symbol_table,
321  const class_hierarchyt &class_hierarchy,
322  message_handlert &message_handler)
323 {
324  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
325  rem.lower_instanceof(function_identifier, function.body);
326 }
327 
336  goto_functionst &goto_functions,
337  symbol_table_baset &symbol_table,
338  const class_hierarchyt &class_hierarchy,
339  message_handlert &message_handler)
340 {
341  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
342  bool changed=false;
343  for(auto &f : goto_functions.function_map)
344  changed = rem.lower_instanceof(f.first, f.second.body) || changed;
345  if(changed)
346  goto_functions.compute_location_numbers();
347 }
348 
358  goto_modelt &goto_model,
359  const class_hierarchyt &class_hierarchy,
360  message_handlert &message_handler)
361 {
363  goto_model.goto_functions,
364  goto_model.symbol_table,
365  class_hierarchy,
366  message_handler);
367 }
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
can_cast_type< java_reference_typet >
bool can_cast_type< java_reference_typet >(const typet &type)
Definition: java_types.h:624
conjunction
exprt conjunction(const exprt::operandst &op)
1) generates a conjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:34
class_hierarchyt
Non-graph-based representation of the class hierarchy.
Definition: class_hierarchy.h:43
Forall_operands
#define Forall_operands(it, expr)
Definition: expr.h:25
arith_tools.h
get_array_dimension_field
exprt get_array_dimension_field(const exprt &pointer)
Definition: java_types.cpp:204
get_class_identifier_field
exprt get_class_identifier_field(const exprt &this_expr_in, const struct_tag_typet &suggested_type, const namespacet &ns)
Definition: class_identifier.cpp:57
goto_programt::update
void update()
Update all indices.
Definition: goto_program.cpp:576
remove_instanceoft::ns
namespacet ns
Definition: remove_instanceof.cpp:48
goto_model.h
Symbol Table + CFG.
goto_functionst::compute_location_numbers
void compute_location_numbers()
Definition: goto_functions.cpp:20
exprt
Base class for all expressions.
Definition: expr.h:54
goto_modelt
Definition: goto_model.h:26
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:29
equal_exprt
Equality.
Definition: std_expr.h:1225
remove_instanceoft::class_hierarchy
const class_hierarchyt & class_hierarchy
Definition: remove_instanceof.cpp:47
notequal_exprt
Disequality.
Definition: std_expr.h:1283
remove_instanceof
void remove_instanceof(const irep_idt &function_identifier, goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
Replace an instanceof in the expression or guard of the passed instruction of the given function body...
Definition: remove_instanceof.cpp:297
to_binary_expr
const binary_exprt & to_binary_expr(const exprt &expr)
Cast an exprt to a binary_exprt.
Definition: std_expr.h:627
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:82
disjunction
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:22
null_pointer_exprt
The null pointer constant.
Definition: pointer_expr.h:703
symbol_table_baset
The symbol table base class interface.
Definition: symbol_table_base.h:22
to_pointer_type
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:62
irept::id
const irep_idt & id() const
Definition: irep.h:407
message_handlert
Definition: message.h:28
to_struct_tag_type
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:474
exprt::operandst
std::vector< exprt > operandst
Definition: expr.h:56
remove_instanceoft
Definition: remove_instanceof.cpp:23
java_lang_object_type
java_reference_typet java_lang_object_type()
Definition: java_types.cpp:98
remove_instanceoft::symbol_table
symbol_table_baset & symbol_table
Definition: remove_instanceof.cpp:46
remove_instanceof.h
Remove Instance-of Operators.
goto_functionst::goto_functiont
::goto_functiont goto_functiont
Definition: goto_functions.h:27
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:652
goto_functionst
A collection of goto functions.
Definition: goto_functions.h:25
class_hierarchy.h
Class Hierarchy.
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
remove_instanceoft::message_handler
message_handlert & message_handler
Definition: remove_instanceof.cpp:49
remove_instanceoft::remove_instanceoft
remove_instanceoft(symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
Definition: remove_instanceof.cpp:25
JAVA_REFERENCE_ARRAY_CLASSID
#define JAVA_REFERENCE_ARRAY_CLASSID
Definition: java_types.h:656
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
string_typet
String type.
Definition: std_types.h:880
binary_relation_exprt
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:674
contains_instanceof
static bool contains_instanceof(const exprt &e)
Definition: remove_instanceof.cpp:218
subtype_expr
static exprt subtype_expr(const exprt &classid_field, const irep_idt &target_type, const class_hierarchyt &class_hierarchy)
Produce an expression of the form classid_field == "A" || classid_field == "B" || ....
Definition: remove_instanceof.cpp:67
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:71
class_identifier.h
Extract class identifier.
java_array_dimension_and_element_type
std::pair< typet, std::size_t > java_array_dimension_and_element_type(const struct_tag_typet &type)
Returns the underlying element type and array dimensionality of Java struct type.
Definition: java_types.cpp:187
exprt::operands
operandst & operands()
Definition: expr.h:92
INVARIANT
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
java_types.h
goto_programt::insert_before_swap
void insert_before_swap(targett target)
Insertion that preserves jumps to "target".
Definition: goto_program.h:673
get_array_element_type_field
exprt get_array_element_type_field(const exprt &pointer)
Definition: java_types.cpp:216
constant_exprt
A constant literal expression.
Definition: std_expr.h:2753
binary_exprt::op1
exprt & op1()
Definition: expr.h:102
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
goto_programt::targett
instructionst::iterator targett
Definition: goto_program.h:646
class_hierarchyt::get_children_trans
idst get_children_trans(const irep_idt &id) const
Definition: class_hierarchy.h:68
binary_exprt::op0
exprt & op0()
Definition: expr.h:99
remove_instanceoft::lower_instanceof
bool lower_instanceof(const irep_idt &function_identifier, goto_programt &)
Replace every instanceof in the passed function body with an explicit class-identifier test.
Definition: remove_instanceof.cpp:269