93 lines
4.7 KiB
Protocol Buffer
93 lines
4.7 KiB
Protocol Buffer
// Copyright 2017 The Chromium Authors
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// This file contains the definition of the Url protobuf used in the
|
|
// url_parse_proto_fuzzer that is meant to serve as an example for future
|
|
// Chromium fuzzers that use libprotobuf-mutator. We will consider the format of
|
|
// a URL for this fuzzer, to be
|
|
// [scheme:][//[user[:password]@]host[:port]][/path][?query][#value] There may
|
|
// be some URLs Chromium treats as valid that this syntax does not capture.
|
|
// However, we will ignore them for the sake of simplicity. It is recommended to
|
|
// read this file in conjunction with Convert() in url_proto_converter.cc as
|
|
// logic in this function is sometimes used to ensure that the Url Protocol
|
|
// Buffer obeys the syntax we have defined for URLs. Though reading it is
|
|
// completely unnecessary for understanding this fuzzer, we have roughly
|
|
// followed RFC 3986 (https://tools.ietf.org/html/rfc3986) which defines the
|
|
// syntax of URIs (which URLs are a subset of).
|
|
|
|
syntax = "proto2";
|
|
|
|
package url_proto;
|
|
|
|
// Here we define the format for a Url Protocol Buffer. This will be passed to
|
|
// our fuzzer function.
|
|
message Url {
|
|
// If there is a scheme, then it must be followed by a colon. A scheme is in
|
|
// practice not required in a URL. Therefore, we will define the scheme as
|
|
// optional but ensure it is followed by a colon in our conversion code if it
|
|
// is included.
|
|
optional string scheme = 1;
|
|
|
|
enum Slash {
|
|
NONE = 0; // Separate path segments using ""
|
|
FORWARD = 1; // Separate path segments using /
|
|
BACKWARD = 2; // Separate path segments using \
|
|
}
|
|
// The syntax rules of the two slashes that precede the host in a URL are
|
|
// surprisingly complex. They are not required, even if a scheme is included
|
|
// (http:example.com is treated as valid), and are valid even if a scheme is
|
|
// not included (//example.com is treated as file:///example.com). They can
|
|
// even be backslashes (http:\\example.com and http\/example.com are both
|
|
// valid) and there can be any number of them (http:/example.com and
|
|
// http://////example.com are both valid).
|
|
// We will therefore define slashes as a list of enum values (repeated
|
|
// Slash). In our conversion code, this will be read to append the
|
|
// appropriate kind and appropriate number of slashes to the URL.
|
|
repeated Slash slashes = 2 [packed = true];
|
|
|
|
// The [user:password@] part of the URL shown above is called the userinfo.
|
|
// Userinfo is not mandatory, but if it is included in a URL, then it must
|
|
// contain a string called user. There is another optional field in userinfo
|
|
// called the password. If a password is included, the user must be
|
|
// separated from it by ":". In either case, the userinfo must be separated
|
|
// from the host by "@". A URL must have a host if it has a userinfo. These
|
|
// requirements will be ensured by the conversion code.
|
|
message Userinfo {
|
|
required string user = 1;
|
|
optional string password = 2;
|
|
}
|
|
optional Userinfo userinfo = 3;
|
|
|
|
// Hosts, like most else in our Url definition, are optional (there are
|
|
// are URLs such as data URLs that do not have hosts).
|
|
optional string host = 4;
|
|
|
|
// Ports are unsigned integers between 1-2^16. The closest type to this in
|
|
// the proto2 format is uint32. Also if a port number is specified it must
|
|
// be preceded by a colon (consider "google.com80" 80 will be interpreted as
|
|
// part of the host). The conversion code will ensure this is the case.
|
|
optional uint32 port = 5;
|
|
|
|
// The rules for the path are somewhat complex. A path is not required,
|
|
// however if it follows a port or host, it must start with "/" according
|
|
// to the RFC, though Chromium accepts "\" as it converts all backslashes to
|
|
// slashes. It does not need to start with "/" if there is no host (in data
|
|
// URLs for example). Thus we will define path as a repeated string where
|
|
// each member contains a segment of the path and will be preceded by the
|
|
// path_separator. The one exception to this is for the first segment if
|
|
// path_seperator == NONE and there is a non empty path and host, then the
|
|
// first segment will be preceded by "/".
|
|
repeated string path = 6;
|
|
required Slash path_separator = 7 [default = FORWARD];
|
|
|
|
// A query must preceded by "?". This will be ensured in the conversion
|
|
// code. Queries can have many components which the converter will separate
|
|
// using "&", as is the convention.
|
|
repeated string query = 8;
|
|
|
|
// A fragment must preceded by "#". This will be ensured in the conversion
|
|
// code.
|
|
optional string fragment = 9;
|
|
}
|