868 lines
23 KiB
Go
868 lines
23 KiB
Go
// Copyright 2017 Google Inc.
|
||
//
|
||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
// you may not use this file except in compliance with the License.
|
||
// You may obtain a copy of the License at
|
||
//
|
||
// http://www.apache.org/licenses/LICENSE-2.0
|
||
//
|
||
// Unless required by applicable law or agreed to in writing, software
|
||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
// See the License for the specific language governing permissions and
|
||
// limitations under the License.
|
||
|
||
package licenseclassifier
|
||
|
||
import (
|
||
"bytes"
|
||
"log"
|
||
"os"
|
||
"path/filepath"
|
||
"strings"
|
||
"testing"
|
||
|
||
"github.com/google/licenseclassifier/stringclassifier"
|
||
)
|
||
|
||
var (
|
||
agpl30, agpl30Header, apache20, bsd3, gpl20, ccbync20 string
|
||
classifier *License
|
||
)
|
||
|
||
func TestMain(m *testing.M) {
|
||
a30, err := ReadLicenseFile("AGPL-3.0.txt")
|
||
if err != nil {
|
||
log.Fatalf("error reading contents of AGPL-3.0.txt: %v", err)
|
||
}
|
||
a30h, err := ReadLicenseFile("AGPL-3.0.header.txt")
|
||
if err != nil {
|
||
log.Fatalf("error reading contents of AGPL-3.0.header.txt: %v", err)
|
||
}
|
||
a20, err := ReadLicenseFile("Apache-2.0.txt")
|
||
if err != nil {
|
||
log.Fatalf("error reading contents of Apache-2.0.txt: %v", err)
|
||
}
|
||
b3, err := ReadLicenseFile("BSD-3-Clause.txt")
|
||
if err != nil {
|
||
log.Fatalf("error reading contents of BSD-3-Clause.txt: %v", err)
|
||
}
|
||
g2, err := ReadLicenseFile("GPL-2.0.txt")
|
||
if err != nil {
|
||
log.Fatalf("error reading contents of GPL-2.0.txt: %v", err)
|
||
}
|
||
cc20, err := ReadLicenseFile("CC-BY-NC-2.0.txt")
|
||
if err != nil {
|
||
log.Fatalf("error reading contents of CC-BY-NC-2.0.txt: %v", err)
|
||
}
|
||
|
||
agpl30 = TrimExtraneousTrailingText(string(a30))
|
||
agpl30Header = TrimExtraneousTrailingText(string(a30h))
|
||
apache20 = TrimExtraneousTrailingText(string(a20))
|
||
bsd3 = TrimExtraneousTrailingText(string(b3))
|
||
gpl20 = TrimExtraneousTrailingText(string(g2))
|
||
ccbync20 = TrimExtraneousTrailingText(string(cc20))
|
||
|
||
classifier, err = New(DefaultConfidenceThreshold)
|
||
if err != nil {
|
||
log.Fatalf("cannot create license classifier: %v", err)
|
||
}
|
||
os.Exit(m.Run())
|
||
}
|
||
|
||
func TestClassifier_NearestMatch(t *testing.T) {
|
||
tests := []struct {
|
||
description string
|
||
filename string
|
||
extraText string
|
||
wantLicense string
|
||
wantConfidence float64
|
||
}{
|
||
{
|
||
description: "AGPL 3.0 license",
|
||
filename: "AGPL-3.0.txt",
|
||
wantLicense: "AGPL-3.0",
|
||
wantConfidence: 1.0,
|
||
},
|
||
{
|
||
description: "Apache 2.0 license",
|
||
filename: "Apache-2.0.txt",
|
||
wantLicense: "Apache-2.0",
|
||
wantConfidence: 1.0,
|
||
},
|
||
{
|
||
description: "GPL 2.0 license",
|
||
filename: "GPL-2.0.txt",
|
||
wantLicense: "GPL-2.0",
|
||
wantConfidence: 1.0,
|
||
},
|
||
{
|
||
description: "BSD 3 Clause license with extra text",
|
||
filename: "BSD-3-Clause.txt",
|
||
extraText: "New BSD License\nCopyright © 1998 Yoyodyne, Inc.\n",
|
||
wantLicense: "BSD-3-Clause",
|
||
wantConfidence: 0.94,
|
||
},
|
||
}
|
||
|
||
classifier.Threshold = DefaultConfidenceThreshold
|
||
for _, tt := range tests {
|
||
content, err := ReadLicenseFile(tt.filename)
|
||
if err != nil {
|
||
t.Errorf("error reading contents of %q license: %v", tt.wantLicense, err)
|
||
continue
|
||
}
|
||
|
||
m := classifier.NearestMatch(tt.extraText + TrimExtraneousTrailingText(string(content)))
|
||
if got, want := m.Name, tt.wantLicense; got != want {
|
||
t.Errorf("NearestMatch(%q) = %q, want %q", tt.description, got, want)
|
||
}
|
||
if got, want := m.Confidence, tt.wantConfidence; got < want {
|
||
t.Errorf("NearestMatch(%q) = %v, want %v", tt.description, got, want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestClassifier_MultipleMatch(t *testing.T) {
|
||
tests := []struct {
|
||
description string
|
||
text string
|
||
want stringclassifier.Matches
|
||
}{
|
||
{
|
||
description: "Two licenses",
|
||
text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + apache20 + strings.Repeat("-", 80) + "\n" + bsd3,
|
||
want: stringclassifier.Matches{
|
||
{
|
||
Name: "Apache-2.0",
|
||
Confidence: 1.0,
|
||
},
|
||
{
|
||
Name: "BSD-3-Clause",
|
||
Confidence: 1.0,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
description: "Two licenses: partial match",
|
||
text: "Copyright (c) 2016 Yoyodyne, Inc.\n" +
|
||
string(apache20[:len(apache20)/2-1]) + string(apache20[len(apache20)/2+7:]) + strings.Repeat("-", 80) + "\n" +
|
||
string(bsd3[:len(bsd3)/2]) + "intervening stuff" + string(bsd3[len(bsd3)/2:]),
|
||
want: stringclassifier.Matches{
|
||
{
|
||
Name: "Apache-2.0",
|
||
Confidence: 0.99,
|
||
},
|
||
{
|
||
Name: "BSD-3-Clause",
|
||
Confidence: 0.98,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
description: "Two licenses: one forbidden the other okay",
|
||
text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + apache20 + strings.Repeat("-", 80) + "\n" + ccbync20,
|
||
want: stringclassifier.Matches{
|
||
{
|
||
Name: "Apache-2.0",
|
||
Confidence: 0.99,
|
||
},
|
||
{
|
||
Name: "CC-BY-NC-2.0",
|
||
Confidence: 1.0,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
description: "Two licenses without any space between them.",
|
||
text: apache20 + "." + bsd3,
|
||
want: stringclassifier.Matches{
|
||
{
|
||
Name: "Apache-2.0",
|
||
Confidence: 1.0,
|
||
},
|
||
{
|
||
Name: "BSD-3-Clause",
|
||
Confidence: 1.0,
|
||
},
|
||
},
|
||
},
|
||
}
|
||
|
||
classifier.Threshold = 0.95
|
||
defer func() {
|
||
classifier.Threshold = DefaultConfidenceThreshold
|
||
}()
|
||
for _, tt := range tests {
|
||
m := classifier.MultipleMatch(tt.text, false)
|
||
if len(m) != len(tt.want) {
|
||
t.Fatalf("MultipleMatch(%q) number matches: %v, want %v", tt.description, len(m), len(tt.want))
|
||
continue
|
||
}
|
||
|
||
for i := 0; i < len(m); i++ {
|
||
w := tt.want[i]
|
||
if got, want := m[i].Name, w.Name; got != want {
|
||
t.Errorf("MultipleMatch(%q) = %q, want %q", tt.description, got, want)
|
||
}
|
||
if got, want := m[i].Confidence, w.Confidence; got < want {
|
||
t.Errorf("MultipleMatch(%q) = %v, want %v", tt.description, got, want)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestClassifier_MultipleMatch_Headers(t *testing.T) {
|
||
tests := []struct {
|
||
description string
|
||
text string
|
||
want stringclassifier.Matches
|
||
}{
|
||
{
|
||
description: "AGPL-3.0 header",
|
||
text: "Copyright (c) 2016 Yoyodyne, Inc.\n" + agpl30Header,
|
||
want: stringclassifier.Matches{
|
||
{
|
||
Name: "AGPL-3.0",
|
||
Confidence: 1.0,
|
||
Offset: 0,
|
||
},
|
||
},
|
||
},
|
||
{
|
||
description: "Modified LGPL-2.1 header",
|
||
text: `Common Widget code.
|
||
|
||
Copyright (C) 2013-2015 Yoyodyne, Inc.
|
||
|
||
This library is free software; you can redistribute it and/or
|
||
modify it under the terms of the GNU Lesser General Public
|
||
License as published by the Free Software Foundation; either
|
||
version 2.1 of the License, or (at your option) any later version (but not!).
|
||
|
||
This library is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
Lesser General Public License for more details.
|
||
|
||
You should have received a copy of the GNU Lesser General Public
|
||
License along with this library; if not, write to the Free Software
|
||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||
`,
|
||
want: stringclassifier.Matches{
|
||
{
|
||
Name: "LGPL-2.1",
|
||
Confidence: 0.97,
|
||
Offset: 197,
|
||
},
|
||
},
|
||
},
|
||
}
|
||
|
||
classifier.Threshold = 0.90
|
||
defer func() {
|
||
classifier.Threshold = DefaultConfidenceThreshold
|
||
}()
|
||
for _, tt := range tests {
|
||
m := classifier.MultipleMatch(tt.text, true)
|
||
if len(m) != len(tt.want) {
|
||
t.Errorf("MultipleMatch(%q) number matches: %v, want %v", tt.description, len(m), len(tt.want))
|
||
continue
|
||
}
|
||
|
||
for i := 0; i < len(m); i++ {
|
||
w := tt.want[i]
|
||
if got, want := m[i].Name, w.Name; got != want {
|
||
t.Errorf("MultipleMatch(%q) = %q, want %q", tt.description, got, want)
|
||
}
|
||
if got, want := m[i].Confidence, w.Confidence; got < want {
|
||
t.Errorf("MultipleMatch(%q) = %v, want %v", tt.description, got, want)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestClassifier_CopyrightHolder(t *testing.T) {
|
||
tests := []struct {
|
||
copyright string
|
||
want string
|
||
}{
|
||
{
|
||
copyright: "Copyright 2008 Yoyodyne Inc. All Rights Reserved.",
|
||
want: "Yoyodyne Inc.",
|
||
},
|
||
{
|
||
copyright: "Copyright 2010-2016 Yoyodyne, Inc.",
|
||
want: "Yoyodyne, Inc.",
|
||
},
|
||
{
|
||
copyright: "Copyright 2010, 2011, 2012 Yoyodyne, Inc., All rights reserved.",
|
||
want: "Yoyodyne, Inc.",
|
||
},
|
||
{
|
||
copyright: "Copyright (c) 2015 Yoyodyne, Inc. All rights reserved.",
|
||
want: "Yoyodyne, Inc.",
|
||
},
|
||
{
|
||
copyright: "Copyright © 1998 by Yoyodyne, Inc., San Narciso, CA, US.",
|
||
want: "Yoyodyne, Inc., San Narciso, CA, US",
|
||
},
|
||
{
|
||
copyright: "Copyright (c) 2015 The Algonquin Round Table. All rights reserved.",
|
||
want: "The Algonquin Round Table",
|
||
},
|
||
{
|
||
copyright: "Copyright 2016, The Android Open Source Project",
|
||
want: "The Android Open Source Project",
|
||
},
|
||
{
|
||
copyright: `---------------------------------------------------------
|
||
foo.c:
|
||
Copyright 2016, The Android Open Source Project
|
||
`,
|
||
want: "The Android Open Source Project",
|
||
},
|
||
}
|
||
|
||
for _, tt := range tests {
|
||
got := CopyrightHolder(tt.copyright)
|
||
if got != tt.want {
|
||
t.Errorf("CopyrightHolder(%q) = %q, want %q", tt.copyright, got, tt.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestClassifier_WithinConfidenceThreshold(t *testing.T) {
|
||
tests := []struct {
|
||
description string
|
||
text string
|
||
confDef bool
|
||
conf99 bool
|
||
conf93 bool
|
||
conf5 bool
|
||
}{
|
||
{
|
||
description: "Apache 2.0",
|
||
text: apache20,
|
||
confDef: true,
|
||
conf99: true,
|
||
conf93: true,
|
||
conf5: true,
|
||
},
|
||
{
|
||
description: "GPL 2.0",
|
||
text: gpl20,
|
||
confDef: true,
|
||
conf99: true,
|
||
conf93: true,
|
||
conf5: true,
|
||
},
|
||
{
|
||
description: "BSD 3 Clause license with extra text",
|
||
text: "New BSD License\nCopyright © 1998 Yoyodyne, Inc.\n" + bsd3,
|
||
confDef: true,
|
||
conf99: true,
|
||
conf93: true,
|
||
conf5: true,
|
||
},
|
||
{
|
||
description: "Very low confidence",
|
||
text: strings.Repeat("Random text is random, but not a license\n", 40),
|
||
confDef: false,
|
||
conf99: false,
|
||
conf93: false,
|
||
conf5: true,
|
||
},
|
||
}
|
||
|
||
defer func() {
|
||
classifier.Threshold = DefaultConfidenceThreshold
|
||
}()
|
||
for _, tt := range tests {
|
||
t.Run(tt.description, func(t *testing.T) {
|
||
classifier.Threshold = DefaultConfidenceThreshold
|
||
m := classifier.NearestMatch(tt.text)
|
||
if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.confDef {
|
||
t.Errorf("WithinConfidenceThreshold() at %v returned wrong result; got %v, want %v", classifier.Threshold, got, tt.confDef)
|
||
}
|
||
|
||
classifier.Threshold = 0.99
|
||
m = classifier.NearestMatch(tt.text)
|
||
if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf99 {
|
||
t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf99)
|
||
}
|
||
|
||
classifier.Threshold = 0.93
|
||
m = classifier.NearestMatch(tt.text)
|
||
if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf93 {
|
||
t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf93)
|
||
}
|
||
|
||
classifier.Threshold = 0.05
|
||
m = classifier.NearestMatch(tt.text)
|
||
if got := classifier.WithinConfidenceThreshold(m.Confidence); got != tt.conf5 {
|
||
t.Errorf("WithinConfidenceThreshold(%q) = %v, want %v", tt.description, got, tt.conf5)
|
||
}
|
||
})
|
||
}
|
||
}
|
||
|
||
func TestRemoveIgnorableText(t *testing.T) {
|
||
const want = `Lorem ipsum dolor sit amet, pellentesque wisi tortor duis, amet adipiscing bibendum elit aliquam
|
||
leo. Mattis commodo sed accumsan at in.
|
||
`
|
||
|
||
tests := []struct {
|
||
original string
|
||
want string
|
||
}{
|
||
{"MIT License\n", "\n"},
|
||
{"The MIT License\n", "\n"},
|
||
{"The MIT License (MIT)\n", "\n"},
|
||
{"BSD License\n", "\n"},
|
||
{"New BSD License\n", "\n"},
|
||
{"COPYRIGHT AND PERMISSION NOTICE\n", "\n"},
|
||
{"Copyright (c) 2016, Yoyodyne, Inc.\n", "\n"},
|
||
{"All rights reserved.\n", "\n"},
|
||
{"Some rights reserved.\n", "\n"},
|
||
{"@license\n", "\n"},
|
||
|
||
// Now with wanted texts.
|
||
{
|
||
original: `The MIT License
|
||
|
||
Copyright (c) 2016, Yoyodyne, Inc.
|
||
All rights reserved.
|
||
` + want,
|
||
want: strings.ToLower(want),
|
||
},
|
||
}
|
||
|
||
for _, tt := range tests {
|
||
if got := removeIgnorableTexts(strings.ToLower(tt.original)); got != tt.want {
|
||
t.Errorf("Mismatch(%q) =>\n%s\nwant:\n%s", tt.original, got, tt.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestRemoveShebangLine(t *testing.T) {
|
||
tests := []struct {
|
||
original string
|
||
want string
|
||
}{
|
||
{
|
||
original: "",
|
||
want: "",
|
||
},
|
||
{
|
||
original: "#!/usr/bin/env python -C",
|
||
want: "#!/usr/bin/env python -C",
|
||
},
|
||
{
|
||
original: `#!/usr/bin/env python -C
|
||
# First line of license text.
|
||
# Second line of license text.
|
||
`,
|
||
want: `# First line of license text.
|
||
# Second line of license text.
|
||
`,
|
||
},
|
||
{
|
||
original: `# First line of license text.
|
||
# Second line of license text.
|
||
`,
|
||
want: `# First line of license text.
|
||
# Second line of license text.
|
||
`,
|
||
},
|
||
}
|
||
|
||
for _, tt := range tests {
|
||
got := removeShebangLine(tt.original)
|
||
if got != tt.want {
|
||
t.Errorf("RemoveShebangLine(%q) =>\n%s\nwant:\n%s", tt.original, got, tt.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestRemoveNonWords(t *testing.T) {
|
||
tests := []struct {
|
||
original string
|
||
want string
|
||
}{
|
||
{
|
||
original: `# # Hello
|
||
## World
|
||
`,
|
||
want: ` Hello World `,
|
||
},
|
||
{
|
||
original: ` * This text has a bulleted list:
|
||
* * item 1
|
||
* * item 2`,
|
||
want: ` This text has a bulleted list item 1 item 2`,
|
||
},
|
||
{
|
||
original: `
|
||
|
||
* This text has a bulleted list:
|
||
* * item 1
|
||
* * item 2`,
|
||
want: ` This text has a bulleted list item 1 item 2`,
|
||
},
|
||
{
|
||
original: `// This text has a bulleted list:
|
||
// 1. item 1
|
||
// 2. item 2`,
|
||
want: ` This text has a bulleted list 1 item 1 2 item 2`,
|
||
},
|
||
{
|
||
original: `// «Copyright (c) 1998 Yoyodyne, Inc.»
|
||
// This text has a bulleted list:
|
||
// 1. item 1
|
||
// 2. item 2
|
||
`,
|
||
want: ` «Copyright c 1998 Yoyodyne Inc » This text has a bulleted list 1 item 1 2 item 2 `,
|
||
},
|
||
{
|
||
original: `*
|
||
* This is the first line we want.
|
||
* This is the second line we want.
|
||
* This is the third line we want.
|
||
* This is the last line we want.
|
||
`,
|
||
want: ` This is the first line we want This is the second line we want This is the third line we want This is the last line we want `,
|
||
},
|
||
{
|
||
original: `===---------------------------------------------===
|
||
***
|
||
* This is the first line we want.
|
||
* This is the second line we want.
|
||
* This is the third line we want.
|
||
* This is the last line we want.
|
||
***
|
||
===---------------------------------------------===
|
||
`,
|
||
want: ` This is the first line we want This is the second line we want This is the third line we want This is the last line we want `,
|
||
},
|
||
{
|
||
original: strings.Repeat("-", 80),
|
||
want: " ",
|
||
},
|
||
{
|
||
original: strings.Repeat("=", 80),
|
||
want: " ",
|
||
},
|
||
{
|
||
original: "/*\n",
|
||
want: " ",
|
||
},
|
||
{
|
||
original: "/*\n * precursor text\n */\n",
|
||
want: " precursor text ",
|
||
},
|
||
// Test for b/63540492.
|
||
{
|
||
original: " */\n",
|
||
want: " ",
|
||
},
|
||
{
|
||
original: "",
|
||
want: "",
|
||
},
|
||
}
|
||
|
||
for _, tt := range tests {
|
||
if got := stringclassifier.FlattenWhitespace(RemoveNonWords(tt.original)); got != tt.want {
|
||
t.Errorf("Mismatch(%q) => %v, want %v", tt.original, got, tt.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestNormalizePunctuation(t *testing.T) {
|
||
tests := []struct {
|
||
original string
|
||
want string
|
||
}{
|
||
// Hyphens and dashes.
|
||
{"—", "-"},
|
||
{"-", "-"},
|
||
{"‒", "-"},
|
||
{"–", "-"},
|
||
{"—", "-"},
|
||
|
||
// Quotes.
|
||
{"'", "'"},
|
||
{`"`, "'"},
|
||
{"‘", "'"},
|
||
{"’", "'"},
|
||
{"“", "'"},
|
||
{"”", "'"},
|
||
{" ” ", " ' "},
|
||
|
||
// Backtick.
|
||
{"`", "'"},
|
||
|
||
// Copyright mark.
|
||
{"©", "(c)"},
|
||
|
||
// Hyphen-separated words.
|
||
{"general- purpose, non- compliant", "general-purpose, non-compliant"},
|
||
|
||
// Section.
|
||
{"§", "(s)"},
|
||
{"¤", "(s)"},
|
||
}
|
||
|
||
for _, tt := range tests {
|
||
if got := NormalizePunctuation(tt.original); got != tt.want {
|
||
t.Errorf("Mismatch => %v, want %v", got, tt.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestNormalizeEquivalentWords(t *testing.T) {
|
||
tests := []struct {
|
||
original string
|
||
want string
|
||
}{
|
||
{"acknowledgment", "Acknowledgement"},
|
||
{"ANalogue", "Analog"},
|
||
{"AnAlyse", "Analyze"},
|
||
{"ArtefacT", "Artifact"},
|
||
{"authorisation", "Authorization"},
|
||
{"AuthoriSed", "Authorized"},
|
||
{"CalIbre", "Caliber"},
|
||
{"CanCelled", "Canceled"},
|
||
{"CapitaliSations", "Capitalizations"},
|
||
{"CatalogUe", "Catalog"},
|
||
{"CategoriSe", "Categorize"},
|
||
{"CentRE", "Center"},
|
||
{"EmphasiSed", "Emphasized"},
|
||
{"FavoUr", "Favor"},
|
||
{"FavoUrite", "Favorite"},
|
||
{"FulfiL", "Fulfill"},
|
||
{"FulfiLment", "Fulfillment"},
|
||
{"InitialiSe", "Initialize"},
|
||
{"JudGMent", "Judgement"},
|
||
{"LabelLing", "Labeling"},
|
||
{"LaboUr", "Labor"},
|
||
{"LicenCe", "License"},
|
||
{"MaximiSe", "Maximize"},
|
||
{"ModelLed", "Modeled"},
|
||
{"ModeLling", "Modeling"},
|
||
{"OffenCe", "Offense"},
|
||
{"OptimiSe", "Optimize"},
|
||
{"OrganiSation", "Organization"},
|
||
{"OrganiSe", "Organize"},
|
||
{"PractiSe", "Practice"},
|
||
{"ProgramME", "Program"},
|
||
{"RealiSe", "Realize"},
|
||
{"RecogniSe", "Recognize"},
|
||
{"SignalLing", "Signaling"},
|
||
{"sub-license", "Sublicense"},
|
||
{"sub license", "Sublicense"},
|
||
{"UtiliSation", "Utilization"},
|
||
{"WhilST", "While"},
|
||
{"WilfuL", "Wilfull"},
|
||
{"Non-coMMercial", "Noncommercial"},
|
||
{"Per Cent", "Percent"},
|
||
}
|
||
|
||
for _, tt := range tests {
|
||
if got := NormalizeEquivalentWords(tt.original); got != tt.want {
|
||
t.Errorf("Mismatch => %v, want %v", got, tt.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestTrimExtraneousTrailingText(t *testing.T) {
|
||
tests := []struct {
|
||
original string
|
||
want string
|
||
}{
|
||
{
|
||
original: `12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
|
||
ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE
|
||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||
SUCH DAMAGES.
|
||
|
||
END OF TERMS AND CONDITIONS
|
||
|
||
How to Apply These Terms to Your New Programs
|
||
|
||
If you develop a new program, and you want it to be of the greatest
|
||
possible use to the public, the best way to achieve this is to make it free
|
||
software which everyone can redistribute and change under these terms.
|
||
`,
|
||
want: `12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
|
||
ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE
|
||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||
SUCH DAMAGES.
|
||
|
||
END OF TERMS AND CONDITIONS`,
|
||
},
|
||
}
|
||
|
||
for _, tt := range tests {
|
||
if got := TrimExtraneousTrailingText(tt.original); got != tt.want {
|
||
t.Errorf("Mismatch => %q, want %q", got, tt.want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestCommonLicenseWords(t *testing.T) {
|
||
files, err := ReadLicenseDir()
|
||
if err != nil {
|
||
t.Fatalf("error: cannot read licenses directory: %v", err)
|
||
}
|
||
if files == nil {
|
||
t.Fatal("error: cannot get licenses from license directory")
|
||
}
|
||
|
||
for _, file := range files {
|
||
if filepath.Ext(file.Name()) != ".txt" {
|
||
continue
|
||
}
|
||
text, err := ReadLicenseFile(file.Name())
|
||
if err != nil {
|
||
t.Fatalf("error reading contents of %q: %v", file.Name(), err)
|
||
}
|
||
|
||
if got := classifier.hasCommonLicenseWords(string(text)); !got {
|
||
t.Errorf("Mismatch(%q) => false, want true", file.Name())
|
||
}
|
||
}
|
||
|
||
text := strings.Repeat("Þetta er ekki leyfi.\n", 80)
|
||
if got := classifier.hasCommonLicenseWords(text); got {
|
||
t.Error("Mismatch => true, want false")
|
||
}
|
||
}
|
||
|
||
func TestLicenseMatchQuality(t *testing.T) {
|
||
files, err := ReadLicenseDir()
|
||
if err != nil {
|
||
t.Fatalf("error: cannot read licenses directory: %v", err)
|
||
}
|
||
|
||
classifier.Threshold = 1.0
|
||
defer func() {
|
||
classifier.Threshold = DefaultConfidenceThreshold
|
||
}()
|
||
for _, file := range files {
|
||
if filepath.Ext(file.Name()) != ".txt" {
|
||
continue
|
||
}
|
||
name := strings.TrimSuffix(file.Name(), ".txt")
|
||
|
||
contents, err := ReadLicenseFile(file.Name())
|
||
if err != nil {
|
||
t.Fatalf("error reading contents of %q: %v", file.Name(), err)
|
||
}
|
||
|
||
m := classifier.NearestMatch(TrimExtraneousTrailingText(string(contents)))
|
||
if m == nil {
|
||
t.Errorf("Couldn't match %q", name)
|
||
continue
|
||
}
|
||
|
||
if !classifier.WithinConfidenceThreshold(m.Confidence) {
|
||
t.Errorf("ConfidenceMatch(%q) => %v, want %v", name, m.Confidence, 0.99)
|
||
}
|
||
want := strings.TrimSuffix(name, ".header")
|
||
if want != m.Name {
|
||
t.Errorf("LicenseMatch(%q) => %v, want %v", name, m.Name, want)
|
||
}
|
||
}
|
||
}
|
||
|
||
func BenchmarkClassifier(b *testing.B) {
|
||
contents := apache20[:len(apache20)/2] + "hello" + apache20[len(apache20)/2:]
|
||
|
||
b.ResetTimer()
|
||
for i := 0; i < b.N; i++ {
|
||
classifier, err := New(DefaultConfidenceThreshold)
|
||
if err != nil {
|
||
b.Errorf("Cannot create classifier: %v", err)
|
||
continue
|
||
}
|
||
classifier.NearestMatch(contents)
|
||
}
|
||
}
|
||
|
||
func TestNew(t *testing.T) {
|
||
tests := []struct {
|
||
desc string
|
||
options []OptionFunc
|
||
wantArchive func() []byte
|
||
wantErr bool
|
||
}{
|
||
{
|
||
desc: "no options, use default",
|
||
options: []OptionFunc{},
|
||
wantArchive: nil,
|
||
},
|
||
{
|
||
desc: "specify ForbiddenLicenseArchive",
|
||
options: []OptionFunc{Archive(ForbiddenLicenseArchive)},
|
||
wantArchive: func() []byte {
|
||
b, _ := ReadLicenseFile(ForbiddenLicenseArchive)
|
||
return b
|
||
},
|
||
},
|
||
{
|
||
desc: "file doesn't exist results in error",
|
||
options: []OptionFunc{Archive("doesnotexist")},
|
||
wantArchive: func() []byte { return nil },
|
||
wantErr: true,
|
||
},
|
||
{
|
||
desc: "raw bytes archive",
|
||
options: []OptionFunc{ArchiveBytes([]byte("not a gzipped file"))},
|
||
wantArchive: func() []byte { return []byte("not a gzipped file") },
|
||
wantErr: true,
|
||
},
|
||
{
|
||
desc: "function archive",
|
||
options: []OptionFunc{ArchiveFunc(func() ([]byte, error) {
|
||
return []byte("not a gzipped file"), nil
|
||
})},
|
||
wantArchive: func() []byte { return []byte("not a gzipped file") },
|
||
wantErr: true,
|
||
},
|
||
}
|
||
for _, tt := range tests {
|
||
t.Run(tt.desc, func(t *testing.T) {
|
||
c, err := New(0.5, tt.options...)
|
||
if tt.wantErr != (err != nil) {
|
||
t.Fatalf("unexpected error: %v", err)
|
||
}
|
||
if err == nil {
|
||
if tt.wantArchive == nil {
|
||
if c.archive != nil {
|
||
t.Errorf("wanted default archive, but got specified archive")
|
||
}
|
||
} else {
|
||
got, _ := c.archive()
|
||
want := tt.wantArchive()
|
||
if !bytes.Equal(got, want) {
|
||
t.Errorf("archives did not match; got %d bytes, wanted %d", len(got), len(want))
|
||
}
|
||
}
|
||
}
|
||
})
|
||
}
|
||
|
||
}
|