plang.cc
1 // Copyright 2013, Beeri 15. All rights reserved.
2 // Author: Roman Gershman (romange@gmail.com)
3 //
4 #include "util/plang/plang.h"
5 
6 #include <regex>
7 
8 #include "absl/strings/ascii.h"
9 #include "base/logging.h"
10 #include "strings/hash.h"
11 #include "util/math/mathutil.h"
12 
13 #include <google/protobuf/message.h>
14 #include <google/protobuf/repeated_field.h>
15 
16 using std::string;
17 using namespace std::placeholders;
18 using strings::AsString;
19 
20 namespace plang {
21 
22 typedef std::pair<const gpb::Message*, const gpb::FieldDescriptor*> MsgDscrPair;
23 typedef std::vector<std::tuple<uint32, int, MsgDscrPair>> PathState;
24 
25 static const gpb::Message* AdvanceState(PathState* state) {
26  while (!state->empty()) {
27  auto& b = state->back();
28  int index = ++std::get<1>(b);
29  MsgDscrPair& result = std::get<2>(b);
30  CHECK(result.second->is_repeated());
31  const gpb::Reflection* refl = result.first->GetReflection();
32  if (index >= refl->FieldSize(*result.first, result.second)) {
33  state->pop_back();
34  } else {
35  return &refl->GetRepeatedMessage(*result.first, result.second, index);
36  }
37  }
38  return nullptr;
39 }
40 
41 static void RetrieveNode(const gpb::Message* msg, StringPiece path,
42  std::function<void(const MsgDscrPair&)> cb) {
43  MsgDscrPair result(msg, nullptr);
44  uint32 start = 0;
45 
46  // path index, array index, msg, fd
47  PathState state;
48  while (result.first != nullptr) {
49  size_t next = path.find('.', start);
50  StringPiece part = path.substr(start, next - start);
51  VLOG(2) << "Looking for " << part << " in " << result.first->GetDescriptor()->name()
52  << " next= " << next;
53  result.second = result.first->GetDescriptor()->FindFieldByName(AsString(part));
54  CHECK(result.second != nullptr) << "Could not find field " << part;
55  if (next == string::npos) {
56  cb(result);
57  if ((result.first = AdvanceState(&state)) != nullptr) {
58  start = std::get<0>(state.back());
59  }
60  continue;
61  }
62  CHECK_EQ(result.second->cpp_type(), gpb::FieldDescriptor::CPPTYPE_MESSAGE)
63  << part << " is not a message.";
64  const gpb::Reflection* refl = result.first->GetReflection();
65  start = next + 1;
66  if (result.second->is_repeated()) {
67  if (refl->FieldSize(*result.first, result.second) > 0) {
68  state.push_back(std::make_tuple(start, 0, result));
69  result.first = &refl->GetRepeatedMessage(*result.first, result.second, 0);
70  } else {
71  if ((result.first = AdvanceState(&state)) != nullptr) {
72  start = std::get<0>(state.back());
73  }
74  }
75  } else {
76  result.first = &refl->GetMessage(*result.first, result.second);
77  }
78  }
79 }
80 
81 double ExprValue::PromoteToDouble() const {
82  switch (type) {
83  case CPPTYPE_INT64:
84  return val.int_val;
85  case CPPTYPE_DOUBLE:
86  return val.d_val;
87  case CPPTYPE_UINT64:
88  return val.uint_val;
89  default:
90  LOG(FATAL) << "Not supported " << type;
91  }
92  return 0;
93 }
94 
95 bool ExprValue::Equal(const ExprValue& other) const {
96  CppType t1 = type, t2 = other.type;
97  if (t1 == t2) {
98  switch (t1) {
99  case CPPTYPE_INT64:
100  return val.int_val == other.val.int_val;
101  case CPPTYPE_STRING:
102  return val.str == other.val.str;
103  case CPPTYPE_DOUBLE:
104  return MathUtil::AlmostEquals(val.d_val, other.val.d_val);
105  case CPPTYPE_ENUM:
106  return val.enum_val == other.val.enum_val;
107  default:
108  LOG(FATAL) << "Not supported " << type;
109  }
110  }
111  if (t1 == CPPTYPE_ENUM) {
112  switch (t2) {
113  case CPPTYPE_INT64:
114  return val.enum_val->number() == other.val.int_val;
115  case CPPTYPE_STRING:
116  return val.enum_val->name() == AsString(other.val.str);
117  default:
118  LOG(FATAL) << "Unsupported type for comparing with enum " << t2;
119  }
120  }
121  if (t1 == CPPTYPE_DOUBLE && t2 == CPPTYPE_INT64) {
122  return MathUtil::AlmostEquals(val.d_val, double(other.val.int_val));
123  }
124  if (t2 == CPPTYPE_ENUM || t2 == CPPTYPE_DOUBLE) {
125  return other.Equal(*this);
126  }
127 
128  if (t1 <= 4) { // integer values
129  CHECK_LE(other.type, 4);
130  CHECK_EQ(0, t1 % 2);
131  CHECK_EQ(0, t2 % 2);
132  if (t1 != t2) {
133  if (t1 == CPPTYPE_INT64) {
134  if (val.int_val < 0)
135  return false; // the other value is unsigned so it's different.
136  return uint64(val.int_val) == other.val.uint_val;
137  }
138  DCHECK_EQ(CPPTYPE_INT64, t2);
139  if (other.val.int_val < 0)
140  return false;
141  return uint64(other.val.int_val) == val.uint_val;
142  }
143  }
144  LOG(FATAL) << "Unsupported combination " << t1 << " and " << t2;
145  return false;
146 }
147 
148 bool ExprValue::Less(const ExprValue& other) const {
149  CppType t1 = type, t2 = other.type;
150  CHECK_LE(t1, 5);
151  CHECK_LE(t2, 5);
152  if (t1 != t2) {
153  if (t1 == CPPTYPE_DOUBLE || t2 == CPPTYPE_DOUBLE) {
154  double d1 = PromoteToDouble();
155  double d2 = other.PromoteToDouble();
156  return d1 < d2;
157  }
158  if (t1 == CPPTYPE_INT64) {
159  if (val.int_val < 0)
160  return true; // the other value is unsigned so it's bigger.
161  return uint64(val.int_val) < other.val.uint_val;
162  }
163  DCHECK_EQ(CPPTYPE_INT64, t2);
164  if (other.val.int_val <= 0)
165  return false;
166  return val.uint_val < uint64(other.val.int_val);
167  }
168  switch (t1) {
169  case CPPTYPE_INT64:
170  return val.int_val < other.val.int_val;
171  case CPPTYPE_UINT64:
172  return val.uint_val < other.val.uint_val;
173  case CPPTYPE_DOUBLE:
174  return val.d_val < other.val.d_val;
175  default:
176  LOG(FATAL) << "Not supported " << type;
177  }
178  return false;
179 }
180 
181 bool ExprValue::RLike(const ExprValue& other) const {
182  CppType t1 = type, t2 = other.type;
183  CHECK(t1 == CPPTYPE_STRING && t2 == CPPTYPE_STRING);
184 
185  // TODO(ORI): Currently we compile the regex over and over again, it would be wiser to
186  // only compile it once when the regex is a constant.
187  return std::regex_match(val.str.begin(), val.str.end(),
188  std::regex(other.val.str.begin(), other.val.str.end()));
189 }
190 
191 static void EvalField(Expr::ExprValueCb cb, MsgDscrPair msg_dscr) {
192  const gpb::Message* pmsg = msg_dscr.first;
193  const gpb::FieldDescriptor* fd = msg_dscr.second;
194  const gpb::Reflection* refl = pmsg->GetReflection();
195 
196  typedef gpb::FieldDescriptor FD;
197  if (fd->is_repeated()) {
198  switch (fd->cpp_type()) {
199  case FD::CPPTYPE_INT32: {
200  const auto& arr = refl->GetRepeatedField<int32>(*pmsg, fd);
201  for (int32 val : arr) {
202  cb(ExprValue::fromInt(val));
203  }
204  }
205  return;
206  case FD::CPPTYPE_UINT32: {
207  const auto& arr = refl->GetRepeatedField<uint32>(*pmsg, fd);
208  for (uint32 val : arr) {
209  cb(ExprValue::fromInt(val));
210  }
211  }
212  return;
213  default:
214  LOG(FATAL) << "Not supported repeated " << fd->cpp_type_name();
215  }
216  }
217  switch (fd->cpp_type()) {
218  case FD::CPPTYPE_INT32:
219  cb(ExprValue::fromInt(refl->GetInt32(*pmsg, fd)));
220  return;
221  case FD::CPPTYPE_UINT32:
222  cb(ExprValue::fromUInt(refl->GetUInt32(*pmsg, fd)));
223  return;
224  case FD::CPPTYPE_INT64:
225  cb(ExprValue::fromInt(refl->GetInt64(*pmsg, fd)));
226  return;
227  case FD::CPPTYPE_UINT64:
228  cb(ExprValue::fromUInt(refl->GetUInt64(*pmsg, fd)));
229  return;
230  case FD::CPPTYPE_STRING: {
231  string tmp;
232  cb(ExprValue(refl->GetStringReference(*pmsg, fd, &tmp)));
233  return;
234  }
235  case FD::CPPTYPE_FLOAT:
236  cb(ExprValue::fromDouble(refl->GetFloat(*pmsg, fd)));
237  return;
238  case FD::CPPTYPE_DOUBLE:
239  cb(ExprValue::fromDouble(refl->GetDouble(*pmsg, fd)));
240  return;
241  case FD::CPPTYPE_BOOL:
242  cb(ExprValue::fromInt(refl->GetBool(*pmsg, fd)));
243  return;
244  case FD::CPPTYPE_ENUM:
245  cb(ExprValue(refl->GetEnum(*pmsg, fd)));
246  return;
247  default:
248  LOG(FATAL) << "Not supported yet " << fd->cpp_type_name();
249  }
250 }
251 
252 void StringTerm::eval(const gpb::Message& msg, ExprValueCb cb) const {
253  if (type_ == CONST) {
254  cb(ExprValue(val_));
255  return;
256  }
257  RetrieveNode(&msg, val_, std::bind(&EvalField, cb, _1));
258 }
259 
260 template <typename T, typename U> bool IsOneOf(T&& t, const U&& u) {
261  return t == u;
262  using namespace std::placeholders;
263 }
264 
265 template <typename T, typename U1, typename... U2> bool IsOneOf(T&& t, U1&& u, U2&&... rest) {
266  return t == u || IsOneOf(t, rest...);
267 }
268 
269 void BinOp::eval(const gpb::Message& msg, ExprValueCb cb) const {
270  bool res = false;
271  Expr::ExprValueCb local_cb;
272  switch (type_) {
273  case EQ:
274  local_cb = [this, &res, &msg](const ExprValue& val_left) {
275  right_->eval(msg, [&val_left, &res](const ExprValue& val_right) {
276  if (val_left.Equal(val_right))
277  res = true;
278  return !res;
279  });
280  return !res;
281  };
282  break;
283  case RLIKE:
284  local_cb = [this, &res, &msg](const ExprValue& val_left) {
285  right_->eval(msg, [&val_left, &res](const ExprValue& val_right) {
286  if (val_left.RLike(val_right))
287  res = true;
288  return !res;
289  });
290  return !res;
291  };
292  break;
293  case AND:
294  local_cb = [this, &res, &msg](const ExprValue& val_left) {
295  CHECK_EQ(ExprValue::CPPTYPE_BOOL, val_left.type);
296  if (!val_left.val.bool_val)
297  return true; // continue
298 
299  right_->eval(msg, [&res](const ExprValue& val_right) {
300  CHECK_EQ(ExprValue::CPPTYPE_BOOL, val_right.type);
301  if (!val_right.val.bool_val)
302  return true;
303  res = true;
304  return false;
305  });
306  return !res;
307  };
308  break;
309  case OR:
310  local_cb = [this, &res, &msg](const ExprValue& val_left) {
311  CHECK_EQ(ExprValue::CPPTYPE_BOOL, val_left.type);
312  if (val_left.val.bool_val) {
313  res = true;
314  return false; // continue
315  }
316  right_->eval(msg, [&res](const ExprValue& val_right) {
317  CHECK_EQ(ExprValue::CPPTYPE_BOOL, val_right.type);
318  if (!val_right.val.bool_val)
319  return true;
320  res = true;
321  return false;
322  });
323  return !res;
324  };
325  break;
326  case LT:
327  local_cb = [this, &res, &msg](const ExprValue& val_left) {
328  right_->eval(msg, [&val_left, &res](const ExprValue& val_right) {
329  if (val_left.Less(val_right))
330  res = true;
331  return !res;
332  });
333  return !res;
334  };
335  break;
336  case LE:
337  local_cb = [&](const ExprValue& val_left) {
338  right_->eval(msg, [&val_left, &res](const ExprValue& val_right) {
339  if (val_left.Less(val_right) || val_left.Equal(val_right))
340  res = true;
341  return !res;
342  });
343  return !res;
344  };
345  break;
346  case NOT:
347  local_cb = [&](const ExprValue& val_left) {
348  CHECK_EQ(ExprValue::CPPTYPE_BOOL, val_left.type);
349  if (!val_left.val.bool_val) {
350  res = true;
351  }
352  return !res;
353  };
354  break;
355  }
356  left_->eval(msg, local_cb);
357  cb(ExprValue::fromBool(res));
358 }
359 
360 FunctionTerm::FunctionTerm(const std::string& name, ArgList&& lst)
361  : name_(name), args_(std::move(lst)) {
362  absl::AsciiStrToLower(&name_);
363 }
364 
365 FunctionTerm::~FunctionTerm() {
366  for (auto e : args_)
367  delete e;
368 }
369 
370 void FunctionTerm::eval(const gpb::Message& msg, ExprValueCb cb) const {
371  if (name_ == "hash") {
372  if (args_.size() != 1)
373  LOG(FATAL) << "hash() accepts a single argument";
374  size_t res;
375  args_[0]->eval(msg, [&res](const ExprValue& val) {
376  CHECK_EQ(ExprValue::CPPTYPE_STRING, val.type);
377  res = std::hash<StringPiece>()(val.val.str);
378  return false; // This will only work on the first value of a repeated field
379  });
380  cb(ExprValue::fromUInt(res));
381  } else {
382  LOG(FATAL) << "Unknown function";
383  }
384 }
385 
386 static void IsDefField(Expr::ExprValueCb cb, MsgDscrPair msg_dscr) {
387  const gpb::Message* pmsg = msg_dscr.first;
388  const gpb::FieldDescriptor* fd = msg_dscr.second;
389  const gpb::Reflection* refl = pmsg->GetReflection();
390  bool res = fd->is_repeated() ? refl->FieldSize(*pmsg, fd) > 0 : refl->HasField(*pmsg, fd);
391  cb(ExprValue::fromBool(res));
392 }
393 
394 void IsDefFun::eval(const gpb::Message& msg, ExprValueCb cb) const {
395  VLOG(1) << "IsDefFun " << name_;
396  RetrieveNode(&msg, name_, std::bind(&IsDefField, cb, _1));
397 }
398 
399 bool EvaluateBoolExpr(const Expr& e, const gpb::Message& msg) {
400  bool res = false;
401  e.eval(msg, [&res](const plang::ExprValue& val) {
402  CHECK_EQ(ExprValue::CPPTYPE_BOOL, val.type);
403  res = val.val.bool_val;
404  return false;
405  });
406  return res;
407 }
408 
409 } // namespace plang