567 lines
12 KiB
Go
567 lines
12 KiB
Go
// Copyright 2017 Google Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
package commentparser
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
"testing"
|
|
|
|
"github.com/google/go-cmp/cmp"
|
|
"github.com/google/licenseclassifier/commentparser/language"
|
|
)
|
|
|
|
const (
|
|
singleLineText = "single line text"
|
|
multilineText = `first line of text
|
|
second line of text
|
|
third line of text
|
|
`
|
|
)
|
|
|
|
func TestCommentParser_Lex(t *testing.T) {
|
|
tests := []struct {
|
|
description string
|
|
lang language.Language
|
|
source string
|
|
want Comments
|
|
}{
|
|
{
|
|
description: "BCPL Single Line Comments",
|
|
lang: language.Go,
|
|
source: fmt.Sprintf("//%s\n", singleLineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: singleLineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Go Comment With Multiline String",
|
|
lang: language.Go,
|
|
source: fmt.Sprintf("var a = `A\nmultiline\\x20\nstring`\n//%s\n", singleLineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 4,
|
|
EndLine: 4,
|
|
Text: singleLineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Python Multiline String",
|
|
lang: language.Python,
|
|
source: fmt.Sprintf("#%s\n\n\n\nx = '''this is a multiline\nstring'''", singleLineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: singleLineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Python module-level Docstring #1",
|
|
lang: language.Python,
|
|
source: fmt.Sprintf("'''%s'''\nimport foo", multilineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 4,
|
|
Text: multilineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Python module-level Docstring #2",
|
|
lang: language.Python,
|
|
source: fmt.Sprintf("#!/usr/bin/python\n'''%s'''\nimport foo", multilineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: "!/usr/bin/python",
|
|
},
|
|
{
|
|
StartLine: 2,
|
|
EndLine: 5,
|
|
Text: multilineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
// Only include docstrings that start at the beginning of a line
|
|
description: "Python module-level Docstring #3",
|
|
lang: language.Python,
|
|
source: "'''zero1'''\n '''one'''\n '''two'''\n'''zero2'''",
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: "zero1",
|
|
},
|
|
{
|
|
StartLine: 4,
|
|
EndLine: 4,
|
|
Text: "zero2",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "TR Command String",
|
|
lang: language.Python,
|
|
source: fmt.Sprintf(`#%s
|
|
AUTH= \
|
|
| tr '"\n' \
|
|
| base64 -w
|
|
`, singleLineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: singleLineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Lisp Single Line Comments",
|
|
lang: language.Clojure,
|
|
source: fmt.Sprintf(";%s\n", singleLineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: singleLineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Shell Single Line Comments",
|
|
lang: language.Shell,
|
|
source: fmt.Sprintf("#%s\n", singleLineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: singleLineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "BCPL Multiline Comments",
|
|
lang: language.C,
|
|
source: fmt.Sprintf("/*%s*/\n", multilineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 4,
|
|
Text: multilineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "BCPL Multiline Comments no terminating newline",
|
|
lang: language.C,
|
|
source: fmt.Sprintf("/*%s*/", multilineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 4,
|
|
Text: multilineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Nested Multiline Comments",
|
|
lang: language.Swift,
|
|
source: "/*a /*\n nested\n*/\n comment\n*/\n",
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 5,
|
|
Text: "a /*\n nested\n*/\n comment\n",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Ruby Multiline Comments",
|
|
lang: language.Ruby,
|
|
source: fmt.Sprintf("=begin\n%s=end\n", multilineText),
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 5,
|
|
Text: "\n" + multilineText,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Multiple Single Line Comments",
|
|
lang: language.Shell,
|
|
source: `# First line
|
|
# Second line
|
|
# Third line
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: " First line",
|
|
},
|
|
{
|
|
StartLine: 2,
|
|
EndLine: 2,
|
|
Text: " Second line",
|
|
},
|
|
{
|
|
StartLine: 3,
|
|
EndLine: 3,
|
|
Text: " Third line",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Mixed Multiline / Single Line Comments",
|
|
lang: language.C,
|
|
source: `/*
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
*/
|
|
// The first single line comment.
|
|
// The second single line comment.
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 4,
|
|
Text: `
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
`,
|
|
},
|
|
{
|
|
StartLine: 5,
|
|
EndLine: 5,
|
|
Text: " The first single line comment.",
|
|
},
|
|
{
|
|
StartLine: 6,
|
|
EndLine: 6,
|
|
Text: " The second single line comment.",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Mixed Multiline / Single Line Comments",
|
|
lang: language.C,
|
|
source: `/*
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
*/
|
|
// The first single line comment.
|
|
// The second single line comment.
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 4,
|
|
Text: `
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
`,
|
|
},
|
|
{
|
|
StartLine: 5,
|
|
EndLine: 5,
|
|
Text: " The first single line comment.",
|
|
},
|
|
{
|
|
StartLine: 6,
|
|
EndLine: 6,
|
|
Text: " The second single line comment.",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "HTML-like comments and quotes",
|
|
lang: language.HTML,
|
|
source: `# This is an important topic
|
|
I don't want to go on all day here! <-- notice the quote in there!
|
|
<!-- Well, maybe I do... -->
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 3,
|
|
EndLine: 3,
|
|
Text: " Well, maybe I do... ",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "JavaScript regex",
|
|
lang: language.JavaScript,
|
|
source: `var re = /hello"world/;
|
|
// the comment
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 2,
|
|
EndLine: 2,
|
|
Text: " the comment",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Perl regex",
|
|
lang: language.Perl,
|
|
source: `if (/hello"world/) {
|
|
# the comment
|
|
print "Yo!"
|
|
}
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 2,
|
|
EndLine: 2,
|
|
Text: " the comment",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "SQL using MySQL-style comments",
|
|
lang: language.SQL,
|
|
source: `/*
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
*/
|
|
# The first single line comment.
|
|
# The second single line comment.
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 4,
|
|
Text: `
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
`,
|
|
},
|
|
{
|
|
StartLine: 5,
|
|
EndLine: 5,
|
|
Text: " The first single line comment.",
|
|
},
|
|
{
|
|
StartLine: 6,
|
|
EndLine: 6,
|
|
Text: " The second single line comment.",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "SQL using MySQL-style comments",
|
|
lang: language.SQL,
|
|
source: `-- The first single line comment.
|
|
/*
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
*/
|
|
-- The second single line comment.
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: " The first single line comment.",
|
|
},
|
|
{
|
|
StartLine: 2,
|
|
EndLine: 5,
|
|
Text: `
|
|
* The first multiline line.
|
|
* The second multiline line.
|
|
`,
|
|
},
|
|
{
|
|
StartLine: 6,
|
|
EndLine: 6,
|
|
Text: " The second single line comment.",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Matlab language - Single Line Comments",
|
|
lang: language.ObjectiveC, // Matlab has same extension as Objective-C.
|
|
source: `% Copyright 2017 Yoyodyne Inc.
|
|
|
|
clear;
|
|
close all;
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 1,
|
|
Text: " Copyright 2017 Yoyodyne Inc.",
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Matlab language - Multi-Line Comments",
|
|
lang: language.ObjectiveC, // Matlab has same extension as Objective-C.
|
|
source: `%{ Multiline comment start.
|
|
Second line of multiline comment.
|
|
%}
|
|
|
|
clear;
|
|
close all;
|
|
`,
|
|
want: []*Comment{
|
|
{
|
|
StartLine: 1,
|
|
EndLine: 3,
|
|
Text: ` Multiline comment start.
|
|
Second line of multiline comment.
|
|
`,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
got := Parse([]byte(tt.source), tt.lang)
|
|
if !cmp.Equal(got, tt.want) {
|
|
t.Errorf("Mismatch(%q) = %+v, want %+v, diff=%v", tt.description, got, tt.want, cmp.Diff(got, tt.want))
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestCommentParser_ChunkIterator(t *testing.T) {
|
|
tests := []struct {
|
|
description string
|
|
comments Comments
|
|
want []Comments
|
|
}{
|
|
{
|
|
description: "Empty Comments",
|
|
comments: Comments{},
|
|
want: nil,
|
|
},
|
|
{
|
|
description: "Single Line Comment Chunk",
|
|
comments: Comments{
|
|
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
|
|
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
|
|
},
|
|
want: []Comments{{
|
|
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
|
|
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
|
|
}},
|
|
},
|
|
{
|
|
description: "Multiline Comment Chunk",
|
|
comments: Comments{{
|
|
StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
|
|
}},
|
|
want: []Comments{{{
|
|
StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
|
|
}}},
|
|
},
|
|
{
|
|
description: "Multiple Single Line Comment Chunks",
|
|
comments: Comments{
|
|
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
|
|
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
|
|
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
|
|
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
|
|
},
|
|
want: []Comments{
|
|
{
|
|
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
|
|
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
|
|
},
|
|
{
|
|
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
|
|
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Multiline Comment Chunk",
|
|
comments: Comments{
|
|
{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
|
|
{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"},
|
|
},
|
|
want: []Comments{
|
|
{{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}},
|
|
{{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}},
|
|
},
|
|
},
|
|
{
|
|
description: "Multiline and Single Line Comment Chunks",
|
|
comments: Comments{
|
|
{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
|
|
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
|
|
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
|
|
},
|
|
want: []Comments{
|
|
{
|
|
{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
|
|
},
|
|
{
|
|
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
|
|
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
description: "Mixed Multiline / Single Line Comments",
|
|
comments: []*Comment{
|
|
{StartLine: 1, EndLine: 1, Text: " The first single line comment."},
|
|
{StartLine: 2, EndLine: 2, Text: " The second single line comment."},
|
|
{StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
|
|
},
|
|
want: []Comments{
|
|
{
|
|
{StartLine: 1, EndLine: 1, Text: " The first single line comment."},
|
|
{StartLine: 2, EndLine: 2, Text: " The second single line comment."},
|
|
},
|
|
{
|
|
{StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
i := 0
|
|
for got := range tt.comments.ChunkIterator() {
|
|
if i >= len(tt.want) {
|
|
t.Errorf("Mismatch(%q) more comment chunks than expected = %v, want %v",
|
|
tt.description, i+1, len(tt.want))
|
|
break
|
|
}
|
|
if !reflect.DeepEqual(got, tt.want[i]) {
|
|
t.Errorf("Mismatch(%q) = %+v, want %+v", tt.description, got, tt.want[i])
|
|
}
|
|
i++
|
|
}
|
|
if i != len(tt.want) {
|
|
t.Errorf("Mismatch(%q) not enough comment chunks = %v, want %v",
|
|
tt.description, i, len(tt.want))
|
|
}
|
|
}
|
|
}
|