4 #include "util/pprint/pprint_utils.h" 7 #include <unordered_map> 8 #include <glog/stl_logging.h> 9 #include <google/protobuf/compiler/importer.h> 10 #include <google/protobuf/dynamic_message.h> 12 #include "base/flags.h" 13 #include "base/logging.h" 14 #include "strings/escaping.h" 15 #include "strings/numbers.h" 16 #include "strings/split.h" 17 #include "strings/strcat.h" 19 DEFINE_bool(
short,
false,
"");
20 DEFINE_string(csv,
"",
"comma delimited list of tag numbers. For repeated fields, it's possible " 21 "to add :[delimiting char] after a tag number.");
22 DEFINE_bool(use_csv_null,
true,
"When printing csv format use \\N for outputing undefined " 24 DEFINE_bool(aggregate_repeated,
false,
"When printing csv format, aggregate repeated leaves in one " 25 "line: \"xx,yy,..\"");
27 DEFINE_bool(omit_blobs,
true,
"");
28 DEFINE_bool(skip_value_escaping,
false,
"");
29 DEFINE_string(root_node,
"",
"");
30 DEFINE_bool(omit_double_quotes,
false,
"Omits double quotes when printing string values");
41 FdPath::FdPath(
const gpb::Descriptor* root, StringPiece path) {
42 std::vector<StringPiece> parts = absl::StrSplit(path,
".");
43 CHECK(!parts.empty()) << path;
44 const gpb::Descriptor* cur_descr = root;
45 for (
size_t j = 0; j < parts.size(); ++j) {
46 const gpb::FieldDescriptor* field =
nullptr;
49 if (safe_strtou32(parts[j], &tag_id)) {
50 field = cur_descr->FindFieldByNumber(tag_id);
52 string tmp(parts[j].data(), parts[j].size());
53 field = cur_descr->FindFieldByName(tmp);
56 CHECK(field) <<
"Can not find tag id " << parts[j];
57 if (j + 1 < parts.size()) {
58 CHECK_EQ(field->cpp_type(), gpb::FieldDescriptor::CPPTYPE_MESSAGE);
59 cur_descr = field->message_type();
61 path_.push_back(field);
65 bool FdPath::IsRepeated()
const {
66 for (
auto v : path_) {
74 void FdPath::ExtractValueRecur(
const gpb::Message& msg, uint32 index, ValueCb cb)
const {
75 CHECK_LT(index, path_.size());
76 auto fd = path_[index];
77 const gpb::Reflection* reflection = msg.GetReflection();
78 uint32 cur_repeated_depth = 0;
79 for (uint32 i = 0; i < index; ++i) {
80 if (path_[i]->is_repeated()) ++cur_repeated_depth;
82 if (fd->is_repeated()) {
83 int sz = reflection->FieldSize(msg, fd);
85 if (index + 1 < path_.size()) {
87 if (cur_repeated_depth < cur_repeated_stack_.size()) {
88 const gpb::Message& new_msg =
89 reflection->GetRepeatedMessage(msg, fd, cur_repeated_stack_[cur_repeated_depth]);
90 ExtractValueRecur(new_msg, index + 1, cb);
92 for (
int i = 0; i < sz; ++i) {
93 cur_repeated_stack_.push_back(i);
94 const gpb::Message& new_msg = reflection->GetRepeatedMessage(msg, fd, i);
95 ExtractValueRecur(new_msg, index + 1, cb);
96 cur_repeated_stack_.pop_back();
102 if (FLAGS_aggregate_repeated) {
105 for (
int i = 0; i < sz; ++i) {
114 if (index + 1 < path_.size()) {
115 const gpb::Message& new_msg = reflection->GetMessage(msg, fd);
116 ExtractValueRecur(new_msg, index + 1, cb);
128 static gpb::SimpleDescriptorDatabase proto_db;
129 static gpb::DescriptorPool proto_db_pool(&proto_db);
131 gpb::Message* AllocateMsgByMeta(
const string& type,
const string& fd_set) {
132 CHECK(!type.empty());
133 CHECK(!fd_set.empty());
136 const gpb::Descriptor* descriptor = proto_db_pool.FindMessageTypeByName(type);
138 gpb::FileDescriptorSet fd_set_proto;
139 CHECK(fd_set_proto.ParseFromString(fd_set));
140 for (
int i = 0; i < fd_set_proto.file_size(); ++i) {
147 CHECK(proto_db.Add(fd_set_proto.file(i)));
156 descriptor = proto_db_pool.FindMessageTypeByName(type);
159 CHECK(descriptor) <<
"Can not find " << type <<
" in the proto pool.";
160 return AllocateMsgFromDescr(descriptor);
163 gpb::Message* AllocateMsgFromDescr(
const gpb::Descriptor* descr) {
164 static gpb::DynamicMessageFactory message_factory(&proto_db_pool);
165 message_factory.SetDelegateToGeneratedFactory(
true);
167 const gpb::Message* msg_proto = message_factory.GetPrototype(descr);
168 CHECK_NOTNULL(msg_proto);
169 return msg_proto->New();
172 PathNode* PathNode::AddChild(
const gpb::FieldDescriptor* fd) {
173 for (PathNode& n : children) {
174 if (n.fd == fd)
return &n;
176 children.push_back(PathNode(fd));
177 return &children.back();
182 virtual string PrintString(
const string& val)
const override {
183 if (FLAGS_omit_blobs) {
184 if (val.size() > 100 && std::any_of(val.begin(), val.end(),
185 [](
char c) {
return c < 32; })) {
186 return "\"Not work safe!\"";
189 const string& val2 = FLAGS_skip_value_escaping ? val : absl::Utf8SafeCEscape(val);
190 if (FLAGS_omit_double_quotes) {
193 return absl::StrCat(
"\"", val2,
"\"");
197 void RegisterCustomFieldPrinter(
198 const gpb::Descriptor* descriptor, Printer::FieldPrinterPredicate pred,
199 const std::unordered_map<int, const gpb::FieldDescriptor*>& fo_tags_map,
200 gpb::TextFormat::Printer* printer) {
201 CHECK_NOTNULL(descriptor);
203 for (
int i = 0; i < descriptor->field_count(); ++i) {
204 const gpb::FieldDescriptor* fd = descriptor->field(i);
206 if (fd->cpp_type() == gpb::FieldDescriptor::CPPTYPE_MESSAGE) {
207 RegisterCustomFieldPrinter(fd->message_type(), pred, fo_tags_map, printer);
210 gpb::TextFormat::FieldValuePrinter* custom = pred(*fd);
212 printer->RegisterFieldValuePrinter(fd, custom);
217 Printer::Printer(
const gpb::Descriptor* descriptor, FieldPrinterPredicate pred)
218 : type_name_(descriptor->full_name()) {
219 printer_.SetDefaultFieldValuePrinter(
new BetterPrinter());
220 printer_.SetUseShortRepeatedPrimitives(
true);
223 std::vector<StringPiece> tags = absl::StrSplit(FLAGS_csv,
",", absl::SkipWhitespace());
225 printer_.SetInitialIndentLevel(1);
226 printer_.SetSingleLineMode(FLAGS_short);
227 if (!FLAGS_root_node.empty()) {
228 root_path_ = FdPath{descriptor, FLAGS_root_node};
229 CHECK(root_path_.valid());
230 const gpb::FieldDescriptor* fd = root_path_.path().back();
231 CHECK_EQ(gpb::FieldDescriptor::CPPTYPE_MESSAGE, fd->cpp_type());
234 for (StringPiece tag_path : tags) {
235 FdPath fd_path(descriptor, tag_path);
236 PathNode* cur_node = &root_;
237 for (
const gpb::FieldDescriptor* fd: fd_path.path()) {
238 cur_node = cur_node->AddChild(fd);
240 fds_.push_back(std::move(fd_path));
244 const gpb::Descriptor* fo_descr_root =
245 proto_db_pool.FindMessageTypeByName(
"google.protobuf.FieldOptions");
246 if (fo_descr_root ==
nullptr) {
247 fo_descr_root = gpb::DescriptorPool::generated_pool()
248 ->FindMessageTypeByName(
"google.protobuf.FieldOptions");
251 CHECK_NOTNULL(fo_descr_root);
253 std::unordered_map<int, const gpb::FieldDescriptor*> fo_tags_map;
254 vector<const gpb::FieldDescriptor*> fields;
255 proto_db_pool.FindAllExtensions(fo_descr_root, &fields);
257 for (
const gpb::FieldDescriptor* fl : fields) {
258 fo_tags_map[fl->number()] = fl;
262 RegisterCustomFieldPrinter(descriptor, pred, fo_tags_map, &printer_);
264 google::FlushLogFiles(google::GLOG_INFO);
268 void Printer::Output(
const gpb::Message& msg)
const {
271 CHECK(printer_.PrintToString(msg, &text_output));
272 std::cout << type_name_ <<
" {" << (FLAGS_short ?
" " :
"\n")
273 << text_output <<
"}\n";
275 PrintValueRecur(0,
"",
false, msg);
279 void Printer::PrintValueRecur(
size_t path_index,
const string& prefix,
280 bool has_value,
const gpb::Message& msg)
const {
281 CHECK_LT(path_index, fds_.size());
282 auto cb_fun = [path_index,
this, has_value, &prefix, &msg](
285 const gpb::Message& parent,
const gpb::FieldDescriptor* fd,
int item_index,
int num_items) {
287 CHECK_NE(num_items, 0);
289 if (num_items != -1) {
290 if (!FLAGS_omit_double_quotes)
292 for (
int i=0; i < num_items; i++) {
294 printer_.PrintFieldValueToString(parent, fd, i, &repeated_val);
295 absl::StrAppend(&val, repeated_val,
",");
297 if (FLAGS_omit_double_quotes)
302 printer_.PrintFieldValueToString(parent, fd, item_index, &val);
303 if (item_index == -1) {
304 const gpb::Reflection* reflection = parent.GetReflection();
305 if (FLAGS_use_csv_null && !reflection->HasField(parent, fd)) {
311 string next_val = (path_index == 0) ? val : StrCat(prefix,
",", val);
312 bool next_has_value = has_value | !val.empty();
313 if (path_index + 1 == fds_.size()) {
315 cout << next_val << std::endl;
317 PrintValueRecur(path_index + 1, next_val, next_has_value, msg);
320 fds_[path_index].ExtractValue(msg, cb_fun);
323 using FD = gpb::FieldDescriptor;
325 static void PrintBqSchemaInternal(
unsigned offset,
const gpb::Descriptor* descr,
326 const PrintBqSchemaOptions& options) {
328 bool continuation_field =
false;
329 for (
int i = 0; i < descr->field_count(); ++i) {
330 const gpb::FieldDescriptor* fd = descr->field(i);
331 string fname = options.field_name_cb ? options.field_name_cb(*fd) : fd->name();
335 if (continuation_field) {
339 continuation_field =
true;
340 cout << string(offset,
' ') << R
"( { "name": ")" << fname << R"(", "type": ")"; 341 const string& type_name = options.type_name_cb ? options.type_name_cb(*fd) :
string{};
343 if (type_name.empty()) {
344 switch (fd->cpp_type()) {
345 case FD::CPPTYPE_INT32:
346 case FD::CPPTYPE_UINT32:
347 case FD::CPPTYPE_INT64:
348 case FD::CPPTYPE_UINT64:
351 case FD::CPPTYPE_BOOL:
355 case FD::CPPTYPE_STRING:
358 case FD::CPPTYPE_DOUBLE:
359 case FD::CPPTYPE_FLOAT:
362 case FD::CPPTYPE_ENUM:
365 case FD::CPPTYPE_MESSAGE:
366 cout << R
"(RECORD", "fields": )"; 367 PrintBqSchemaInternal(offset + 2, fd->message_type(), options); 368 cout << string(offset + 4, ' ');
371 LOG(FATAL) <<
" not supported " << fd->cpp_type_name();
374 cout << type_name <<
"\"";
376 if (fd->is_repeated()) {
377 cout << R
"(, "mode": "REPEATED")"; 378 } else if (fd->is_required()) {
379 cout << R
"(, "mode": "REQUIRED")"; 386 void PrintBqSchema(
const gpb::Descriptor* descr,
const PrintBqSchemaOptions& options) {
387 PrintBqSchemaInternal(0, descr, options);
390 static std::vector<const gpb::FieldDescriptor *> ListFields(
const gpb::Message &msg) {
391 std::vector<const gpb::FieldDescriptor *> initialized_fields;
392 msg.GetReflection()->ListFields(msg, &initialized_fields);
393 return initialized_fields;
396 static size_t GetSize(
const gpb::Message &msg,
397 const gpb::FieldDescriptor *field) {
398 const gpb::Reflection *reflect = msg.GetReflection();
399 const size_t field_size = field->is_repeated() ? reflect->FieldSize(msg, field) : 1;
402 switch (field->type()) {
403 case gpb::FieldDescriptor::TYPE_DOUBLE:
404 case gpb::FieldDescriptor::TYPE_INT64:
405 case gpb::FieldDescriptor::TYPE_UINT64:
406 case gpb::FieldDescriptor::TYPE_FIXED64:
407 case gpb::FieldDescriptor::TYPE_SFIXED64:
408 case gpb::FieldDescriptor::TYPE_SINT64:
409 return 8 * field_size;
410 case gpb::FieldDescriptor::TYPE_FLOAT:
411 case gpb::FieldDescriptor::TYPE_INT32:
412 case gpb::FieldDescriptor::TYPE_UINT32:
413 case gpb::FieldDescriptor::TYPE_FIXED32:
414 case gpb::FieldDescriptor::TYPE_SFIXED32:
415 case gpb::FieldDescriptor::TYPE_SINT32:
416 case gpb::FieldDescriptor::TYPE_ENUM:
417 return 4 * field_size;
418 case gpb::FieldDescriptor::TYPE_BOOL:
420 case gpb::FieldDescriptor::TYPE_STRING:
421 case gpb::FieldDescriptor::TYPE_BYTES: {
423 if (field->is_repeated()) {
425 for (
size_t i = 0; i < field_size; ++i)
426 sum += reflect->GetRepeatedStringReference(msg, field, i, &temp).size();
429 return reflect->GetStringReference(msg, field, &temp).size();
433 LOG(FATAL) <<
" not supported " << field->type();
438 static SizeSummarizer::Trie FillTrie(
const gpb::Descriptor *descr) {
439 using Trie = SizeSummarizer::Trie;
441 trie.Resize(descr->field_count());
442 for (
int i = 0; i < descr->field_count(); ++i) {
443 if (descr->field(i)->type() == gpb::FieldDescriptor::TYPE_MESSAGE)
444 trie.Put(i, std::unique_ptr<Trie>(
new Trie(FillTrie(descr->field(i)->message_type()))));
446 trie.Put(i, std::unique_ptr<Trie>(
new Trie));
447 trie.Get(i)->name = descr->field(i)->name();
452 SizeSummarizer::SizeSummarizer(
const gpb::Descriptor *descr)
453 : trie_(FillTrie(descr)) {}
455 static size_t AddSizesImpl(
const gpb::Message &msg,
456 SizeSummarizer::Trie *trie) {
458 for (
const auto &field : ListFields(msg)) {
460 auto subtrie = trie->Get(field->index());
461 if (field->type() == gpb::FieldDescriptor::TYPE_MESSAGE) {
462 const gpb::Reflection *reflect = msg.GetReflection();
463 if (field->is_repeated()) {
464 size_t field_size = reflect->FieldSize(msg, field);
466 for (
size_t i = 0; i < field_size; ++i) {
467 const gpb::Message &msg2 = reflect->GetRepeatedMessage(msg, field, i);
468 sz += AddSizesImpl(msg2, subtrie);
471 const gpb::Message &msg2 = reflect->GetMessage(msg, field);
472 sz = AddSizesImpl(msg2, subtrie);
475 sz = GetSize(msg, field);
477 subtrie->bytes += sz;
483 void SizeSummarizer::AddSizes(
const gpb::Message &msg) {
484 AddSizesImpl(msg, &trie_);
487 static void GetSizesImpl(
const SizeSummarizer::Trie &trie,
488 const std::string &path,
489 std::map<std::string, size_t> *out) {
490 std::string new_path;
492 if (trie.name.empty())
495 new_path = trie.name;
497 CHECK(!trie.name.empty());
498 new_path = path +
"." + trie.name;
502 auto iter_and_is_new = out->emplace(new_path, trie.bytes);
503 auto iter = iter_and_is_new.first;
504 bool is_new = iter_and_is_new.second;
506 iter->second = trie.bytes;
509 for (
size_t i = 0; i < trie.Size(); ++i)
510 GetSizesImpl(*trie.Get(i), new_path, out);
514 std::map<std::string, size_t> SizeSummarizer::GetSizes()
const {
515 std::map<std::string, size_t> ret;
516 GetSizesImpl(trie_,
"", &ret);
520 void SizeSummarizer::Print(std::ostream *out_p)
const {
521 for (
const auto &name_and_size : this->GetSizes())
522 std::cout << name_and_size.first <<
" - " << name_and_size.second <<
"\n";