unplugged-system/external/cronet/testing/libfuzzer/fuzzers/generate_javascript_parser_proto.py

#!/usr/bin/env python3

# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""
Script for generating .proto and a conversion .cc file for a templated library
based JavaScript parser fuzzer.
"""

import sys

def ParseWord(word_string):
  # Every part of the word is either a string surrounded by "" or a placeholder
  # $<int>.

  word_string = word_string.lstrip().rstrip()

  parts = []
  while len(word_string) > 0:
    if word_string[0] == '"':
      end_ix = 1 + word_string[1:].index('"')
      parts.append(word_string[1:end_ix])
      word_string = word_string[(end_ix + 1):]
    elif word_string[0] == '$':
      if ' ' in word_string:
        end_ix = word_string.index(' ')
      else:
        end_ix = len(word_string)
      parts.append(int(word_string[1:end_ix]))
      word_string = word_string[end_ix:]
    else:
      assert(False)
    word_string = word_string.lstrip()
  return parts

def GenerateProtoContents(words):
  contents = ''
  for ix in range(len(words)):
    contents += '    token_value_' + str(ix) + ' = ' + str(ix) + ';\n'
  return contents

def GenerateConversionContents(words):
  contents = ''
  ix = 0
  for word in words:
    contents += '    case ' + str(ix) + ':\n'
    max_part = -1
    first = True
    building_string = ''
    for part in word:
      if not first:
        building_string += ' + std::string(" ") + '
      if isinstance(part, str):
        building_string += 'std::string("' + part + '")'
      else:
        if (part > max_part):
          max_part = part
        building_string += ('token_to_string(token.inner_tokens(' + str(part) +
                            '), depth)')
      first = False
    if max_part >= 0:
        contents += ('      if (token.inner_tokens().size() < ' +
                     str(max_part + 1) + ') return std::string("");\n')
    contents += '      return ' + building_string + ';\n'
    ix += 1
  return contents

def ReadDictionary(filename):
  with open(filename) as input_file:
    lines = input_file.readlines()
  words = []
  for line in lines:
    if not line.startswith('#'):
      word = ParseWord(line)
      if len(word) > 0:
        words.append(word)
  return words

def main(argv):
  output_proto_file = argv[1]
  output_cc_file = argv[2]
  input_dict_file = argv[3]

  words = ReadDictionary(input_dict_file)

  proto_header = ('// Generated by generate_javascript_parser_proto.py.\n'
                  '\n'
                  'syntax = "proto2";\n'
                  'package javascript_parser_proto_fuzzer;\n'
                  '\n'
                  'message Token {\n'
                  '  enum Value {\n')


  proto_footer = ('  }\n'
                  '  required Value value = 1;\n'
                  '  repeated Token inner_tokens = 2;\n'
                  '}\n'
                  '\n'
                  'message Source {\n'
                  '  required bool is_module = 1;\n'
                  '  repeated Token tokens = 2;\n'
                  '}\n')

  proto_contents = proto_header + GenerateProtoContents(words) + proto_footer

  with open(output_proto_file, 'w') as f:
    f.write(proto_contents)

  conversion_header = (
      '// Generated by generate_javascript_parser_proto.py.\n'
      '\n'
      '#include "testing/libfuzzer/fuzzers/'
      'javascript_parser_proto_to_string.h"\n'
      '\n'
      '// Bound calls to token_to_string to prevent memory usage from growing\n'
      '// too much.\n'
      'const int kMaxRecursiveDepth = 9;\n'
      '\n'
      'std::string token_to_string(\n'
      '    const javascript_parser_proto_fuzzer::Token& token, int depth)'
      ' {\n'
      '  if (++depth == kMaxRecursiveDepth) return std::string("");\n'
      '  switch(token.value()) {\n')

  conversion_footer = ('    default: break;\n'
                       '  }\n'
                       '  return std::string("");\n'
                       '}\n')

  conversion_contents = (conversion_header + GenerateConversionContents(words)
                         + conversion_footer)

  with open(output_cc_file, 'w') as f:
    f.write(conversion_contents)

if __name__ == "__main__":
  main(sys.argv)
Initial commit: AOSP 14 with modifications for Unplugged OS 2025-10-06 13:59:42 +00:00			`#!/usr/bin/env python3`

			`# Copyright 2017 The Chromium Authors`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`

			`"""`
			`Script for generating .proto and a conversion .cc file for a templated library`
			`based JavaScript parser fuzzer.`
			`"""`

			`import sys`

			`def ParseWord(word_string):`
			`# Every part of the word is either a string surrounded by "" or a placeholder`
			`# $<int>.`

			`word_string = word_string.lstrip().rstrip()`

			`parts = []`
			`while len(word_string) > 0:`
			`if word_string[0] == '"':`
			`end_ix = 1 + word_string[1:].index('"')`
			`parts.append(word_string[1:end_ix])`
			`word_string = word_string[(end_ix + 1):]`
			`elif word_string[0] == '$':`
			`if ' ' in word_string:`
			`end_ix = word_string.index(' ')`
			`else:`
			`end_ix = len(word_string)`
			`parts.append(int(word_string[1:end_ix]))`
			`word_string = word_string[end_ix:]`
			`else:`
			`assert(False)`
			`word_string = word_string.lstrip()`
			`return parts`

			`def GenerateProtoContents(words):`
			`contents = ''`
			`for ix in range(len(words)):`
			`contents += ' token_value_' + str(ix) + ' = ' + str(ix) + ';\n'`
			`return contents`

			`def GenerateConversionContents(words):`
			`contents = ''`
			`ix = 0`
			`for word in words:`
			`contents += ' case ' + str(ix) + ':\n'`
			`max_part = -1`
			`first = True`
			`building_string = ''`
			`for part in word:`
			`if not first:`
			`building_string += ' + std::string(" ") + '`
			`if isinstance(part, str):`
			`building_string += 'std::string("' + part + '")'`
			`else:`
			`if (part > max_part):`
			`max_part = part`
			`building_string += ('token_to_string(token.inner_tokens(' + str(part) +`
			`'), depth)')`
			`first = False`
			`if max_part >= 0:`
			`contents += (' if (token.inner_tokens().size() < ' +`
			`str(max_part + 1) + ') return std::string("");\n')`
			`contents += ' return ' + building_string + ';\n'`
			`ix += 1`
			`return contents`

			`def ReadDictionary(filename):`
			`with open(filename) as input_file:`
			`lines = input_file.readlines()`
			`words = []`
			`for line in lines:`
			`if not line.startswith('#'):`
			`word = ParseWord(line)`
			`if len(word) > 0:`
			`words.append(word)`
			`return words`

			`def main(argv):`
			`output_proto_file = argv[1]`
			`output_cc_file = argv[2]`
			`input_dict_file = argv[3]`

			`words = ReadDictionary(input_dict_file)`

			`proto_header = ('// Generated by generate_javascript_parser_proto.py.\n'`
			`'\n'`
			`'syntax = "proto2";\n'`
			`'package javascript_parser_proto_fuzzer;\n'`
			`'\n'`
			`'message Token {\n'`
			`' enum Value {\n')`


			`proto_footer = (' }\n'`
			`' required Value value = 1;\n'`
			`' repeated Token inner_tokens = 2;\n'`
			`'}\n'`
			`'\n'`
			`'message Source {\n'`
			`' required bool is_module = 1;\n'`
			`' repeated Token tokens = 2;\n'`
			`'}\n')`

			`proto_contents = proto_header + GenerateProtoContents(words) + proto_footer`

			`with open(output_proto_file, 'w') as f:`
			`f.write(proto_contents)`

			`conversion_header = (`
			`'// Generated by generate_javascript_parser_proto.py.\n'`
			`'\n'`
			`'#include "testing/libfuzzer/fuzzers/'`
			`'javascript_parser_proto_to_string.h"\n'`
			`'\n'`
			`'// Bound calls to token_to_string to prevent memory usage from growing\n'`
			`'// too much.\n'`
			`'const int kMaxRecursiveDepth = 9;\n'`
			`'\n'`
			`'std::string token_to_string(\n'`
			`' const javascript_parser_proto_fuzzer::Token& token, int depth)'`
			`' {\n'`
			`' if (++depth == kMaxRecursiveDepth) return std::string("");\n'`
			`' switch(token.value()) {\n')`

			`conversion_footer = (' default: break;\n'`
			`' }\n'`
			`' return std::string("");\n'`
			`'}\n')`

			`conversion_contents = (conversion_header + GenerateConversionContents(words)`
			`+ conversion_footer)`

			`with open(output_cc_file, 'w') as f:`
			`f.write(conversion_contents)`

			`if __name__ == "__main__":`
			`main(sys.argv)`