574 lines
18 KiB
Go
574 lines
18 KiB
Go
|
|
// Copyright 2017 Google Inc.
|
||
|
|
//
|
||
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
// you may not use this file except in compliance with the License.
|
||
|
|
// You may obtain a copy of the License at
|
||
|
|
//
|
||
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
//
|
||
|
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
// See the License for the specific language governing permissions and
|
||
|
|
// limitations under the License.
|
||
|
|
package searchset
|
||
|
|
|
||
|
|
import (
|
||
|
|
"reflect"
|
||
|
|
"testing"
|
||
|
|
|
||
|
|
"github.com/google/licenseclassifier/stringclassifier/searchset/tokenizer"
|
||
|
|
)
|
||
|
|
|
||
|
|
const (
|
||
|
|
shortPostmodernThesis = "In the works of Joyce, a predominant concept is the concept of semioticist"
|
||
|
|
postmodernThesis = `1. Joyce and neotextual modernist theory
|
||
|
|
|
||
|
|
In the works of Joyce, a predominant concept is the concept of semioticist
|
||
|
|
culture. The without/within distinction intrinsic to Finnegan's Wake emerges
|
||
|
|
again in Ulysses, although in a more neomodern sense.
|
||
|
|
`
|
||
|
|
)
|
||
|
|
|
||
|
|
func TestSearchSet_New(t *testing.T) {
|
||
|
|
tests := []struct {
|
||
|
|
description string
|
||
|
|
text string
|
||
|
|
granularity int
|
||
|
|
want *SearchSet
|
||
|
|
}{
|
||
|
|
{
|
||
|
|
description: "Empty string",
|
||
|
|
text: "",
|
||
|
|
granularity: DefaultGranularity,
|
||
|
|
want: &SearchSet{
|
||
|
|
Tokens: nil,
|
||
|
|
Hashes: make(tokenizer.Hash),
|
||
|
|
Checksums: nil,
|
||
|
|
ChecksumRanges: nil,
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Small string",
|
||
|
|
text: "Hello world",
|
||
|
|
granularity: 4,
|
||
|
|
want: &SearchSet{
|
||
|
|
Tokens: tokenizer.Tokens{
|
||
|
|
{Text: "Hello", Offset: 0},
|
||
|
|
{Text: "world", Offset: 6},
|
||
|
|
},
|
||
|
|
Hashes: tokenizer.Hash{2346098258: tokenizer.TokenRanges{{Start: 0, End: 2}}},
|
||
|
|
Checksums: []uint32{2346098258},
|
||
|
|
ChecksumRanges: tokenizer.TokenRanges{{Start: 0, End: 2}},
|
||
|
|
nodes: []*node{{2346098258, &tokenizer.TokenRange{Start: 0, End: 2}}},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, tt := range tests {
|
||
|
|
if got := New(tt.text, tt.granularity); !reflect.DeepEqual(got, tt.want) {
|
||
|
|
t.Errorf("New(%q) = %+v, want %+v", tt.description, got, tt.want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_NodeConstruction(t *testing.T) {
|
||
|
|
s := New(shortPostmodernThesis, DefaultGranularity)
|
||
|
|
want := []string{
|
||
|
|
"[0:3]", "[1:4]", "[2:5]", "[3:6]", "[4:7]", "[5:8]", "[6:9]",
|
||
|
|
"[7:10]", "[8:11]", "[9:12]", "[10:13]", "[11:14]",
|
||
|
|
}
|
||
|
|
|
||
|
|
if len(s.nodes) != len(want) {
|
||
|
|
t.Errorf("Number of nodes %v, want %v", len(s.nodes), len(want))
|
||
|
|
return
|
||
|
|
}
|
||
|
|
|
||
|
|
for i := 0; i < len(s.nodes); i++ {
|
||
|
|
if got := s.nodes[i].String(); got != want[i] {
|
||
|
|
t.Errorf("Nodes = got:\n%s\nwant:\n%s", got, want[i])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_CoalesceTokenRanges(t *testing.T) {
|
||
|
|
tests := []struct {
|
||
|
|
description string
|
||
|
|
mr MatchRanges
|
||
|
|
want MatchRanges
|
||
|
|
}{
|
||
|
|
{
|
||
|
|
description: "Non-overlapping Ranges",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 27, TargetStart: 0, TargetEnd: 27},
|
||
|
|
{SrcStart: 37, SrcEnd: 927, TargetStart: 37, TargetEnd: 927},
|
||
|
|
},
|
||
|
|
want: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 27, TargetStart: 0, TargetEnd: 27},
|
||
|
|
{SrcStart: 37, SrcEnd: 927, TargetStart: 37, TargetEnd: 927},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Identical Ranges",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37},
|
||
|
|
{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37},
|
||
|
|
},
|
||
|
|
want: MatchRanges{{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37}},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Sequential Ranges",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37},
|
||
|
|
{SrcStart: 37, SrcEnd: 927, TargetStart: 37, TargetEnd: 927},
|
||
|
|
},
|
||
|
|
want: MatchRanges{{SrcStart: 0, SrcEnd: 927, TargetStart: 0, TargetEnd: 927}},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Overlapping Ranges - Same Start",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37},
|
||
|
|
{SrcStart: 0, SrcEnd: 927, TargetStart: 0, TargetEnd: 927},
|
||
|
|
},
|
||
|
|
want: MatchRanges{{SrcStart: 0, SrcEnd: 927, TargetStart: 0, TargetEnd: 927}},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Overlapping Ranges - Different Start",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37},
|
||
|
|
{SrcStart: 27, SrcEnd: 927, TargetStart: 27, TargetEnd: 927},
|
||
|
|
},
|
||
|
|
want: MatchRanges{{SrcStart: 0, SrcEnd: 927, TargetStart: 0, TargetEnd: 927}},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Overlapping Ranges - Same End",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37},
|
||
|
|
{SrcStart: 27, SrcEnd: 37, TargetStart: 27, TargetEnd: 37},
|
||
|
|
},
|
||
|
|
want: MatchRanges{{SrcStart: 0, SrcEnd: 37, TargetStart: 0, TargetEnd: 37}},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Completely Overlapping Ranges",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 42, TargetStart: 0, TargetEnd: 42},
|
||
|
|
{SrcStart: 27, SrcEnd: 37, TargetStart: 27, TargetEnd: 37},
|
||
|
|
},
|
||
|
|
want: MatchRanges{{SrcStart: 0, SrcEnd: 42, TargetStart: 0, TargetEnd: 42}},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, tt := range tests {
|
||
|
|
got := coalesceMatchRanges(tt.mr)
|
||
|
|
if !reflect.DeepEqual(got, tt.want) {
|
||
|
|
t.Errorf("CoalesceTokenRanges(%q) = %+v, want %+v", tt.description, got, tt.want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_FindPotentialMatches(t *testing.T) {
|
||
|
|
known := New(postmodernThesis, DefaultGranularity)
|
||
|
|
|
||
|
|
size := len(postmodernThesis)
|
||
|
|
modified := "hello world "
|
||
|
|
modified += postmodernThesis[:size/3] + " hello world "
|
||
|
|
modified += postmodernThesis[size/3 : 2*size/3-4]
|
||
|
|
modified += postmodernThesis[2*size/3+7:]
|
||
|
|
unknown := New(modified, DefaultGranularity)
|
||
|
|
|
||
|
|
want := []MatchRanges{{
|
||
|
|
{SrcStart: 0, SrcEnd: 15, TargetStart: 2, TargetEnd: 17},
|
||
|
|
{SrcStart: 16, SrcEnd: 28, TargetStart: 21, TargetEnd: 33},
|
||
|
|
{SrcStart: 31, SrcEnd: 46, TargetStart: 34, TargetEnd: 49},
|
||
|
|
}}
|
||
|
|
|
||
|
|
got := FindPotentialMatches(known, unknown)
|
||
|
|
if len(got) != len(want) {
|
||
|
|
t.Errorf("Number of matches %v, want %v", len(got), len(want))
|
||
|
|
}
|
||
|
|
if !reflect.DeepEqual(got, want) {
|
||
|
|
t.Errorf("Offsets = %+v, want %+v", got, want)
|
||
|
|
}
|
||
|
|
|
||
|
|
known = New(`again in Ulysses, although in a more neomodern sense.
|
||
|
|
culture. The without/within distinction intrinsic to Finnegan's Wake emerges
|
||
|
|
`, DefaultGranularity)
|
||
|
|
|
||
|
|
want = []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 11, SrcEnd: 18, TargetStart: 26, TargetEnd: 33},
|
||
|
|
{SrcStart: 21, SrcEnd: 25, TargetStart: 34, TargetEnd: 38},
|
||
|
|
},
|
||
|
|
{{SrcStart: 0, SrcEnd: 11, TargetStart: 38, TargetEnd: 49}},
|
||
|
|
}
|
||
|
|
|
||
|
|
got = FindPotentialMatches(known, unknown)
|
||
|
|
if len(got) != len(want) {
|
||
|
|
t.Errorf("Number of matches %v, want %v", len(got), len(want))
|
||
|
|
}
|
||
|
|
if !reflect.DeepEqual(got, want) {
|
||
|
|
t.Errorf("Offsets = %+v, want %+v", got, want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_GetMatchedRanges(t *testing.T) {
|
||
|
|
const (
|
||
|
|
source = "a b c d e f g c d e h i j"
|
||
|
|
target = "a b c _ _ c d e _ f g h _ c d e _ h i j"
|
||
|
|
)
|
||
|
|
|
||
|
|
src := New(source, DefaultGranularity)
|
||
|
|
tar := New(target, DefaultGranularity)
|
||
|
|
|
||
|
|
want := []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 0, TargetEnd: 3},
|
||
|
|
{SrcStart: 2, SrcEnd: 5, TargetStart: 5, TargetEnd: 8},
|
||
|
|
{SrcStart: 7, SrcEnd: 10, TargetStart: 13, TargetEnd: 16},
|
||
|
|
{SrcStart: 10, SrcEnd: 13, TargetStart: 17, TargetEnd: 20},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
got := getMatchedRanges(src, tar)
|
||
|
|
if len(got) != len(want) {
|
||
|
|
t.Errorf("Number of matches %v, want %v", len(got), len(want))
|
||
|
|
}
|
||
|
|
if !reflect.DeepEqual(got, want) {
|
||
|
|
t.Errorf("Match Ranges = %+v, want %+v", got, want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_TargetMatchedRanges(t *testing.T) {
|
||
|
|
const (
|
||
|
|
source = "a b c d e f g c d e h i j"
|
||
|
|
target = "a b c d e _ _ c d e _ f g h _ c d e _ h i j"
|
||
|
|
)
|
||
|
|
|
||
|
|
src := New(source, DefaultGranularity)
|
||
|
|
tar := New(target, DefaultGranularity)
|
||
|
|
|
||
|
|
want := MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 0, TargetEnd: 3},
|
||
|
|
{SrcStart: 1, SrcEnd: 4, TargetStart: 1, TargetEnd: 4},
|
||
|
|
{SrcStart: 2, SrcEnd: 5, TargetStart: 2, TargetEnd: 5},
|
||
|
|
{SrcStart: 7, SrcEnd: 10, TargetStart: 2, TargetEnd: 5},
|
||
|
|
{SrcStart: 2, SrcEnd: 5, TargetStart: 7, TargetEnd: 10},
|
||
|
|
{SrcStart: 7, SrcEnd: 10, TargetStart: 7, TargetEnd: 10},
|
||
|
|
{SrcStart: 2, SrcEnd: 5, TargetStart: 15, TargetEnd: 18},
|
||
|
|
{SrcStart: 7, SrcEnd: 10, TargetStart: 15, TargetEnd: 18},
|
||
|
|
{SrcStart: 10, SrcEnd: 13, TargetStart: 19, TargetEnd: 22},
|
||
|
|
}
|
||
|
|
|
||
|
|
got := targetMatchedRanges(src, tar)
|
||
|
|
if len(got) != len(want) {
|
||
|
|
t.Errorf("Number of matches %v, want %v", len(got), len(want))
|
||
|
|
}
|
||
|
|
if !reflect.DeepEqual(got, want) {
|
||
|
|
t.Errorf("Match Range = %+v, want %+v", got, want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_UntangleSourceRanges(t *testing.T) {
|
||
|
|
tests := []struct {
|
||
|
|
description string
|
||
|
|
mr MatchRanges
|
||
|
|
want MatchRanges
|
||
|
|
}{
|
||
|
|
{
|
||
|
|
description: "Single Match - In Order",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
},
|
||
|
|
want: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Single Match - Out of Order",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 24, TargetEnd: 19},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 24, TargetEnd: 19},
|
||
|
|
{SrcStart: 23, SrcEnd: 29, TargetStart: 30, TargetEnd: 37},
|
||
|
|
},
|
||
|
|
want: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 24, TargetEnd: 19},
|
||
|
|
{SrcStart: 23, SrcEnd: 29, TargetStart: 30, TargetEnd: 37},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Multiple Match - In Order",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 110, TargetEnd: 113},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 114, TargetEnd: 119},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 115, TargetEnd: 120},
|
||
|
|
},
|
||
|
|
want: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 110, TargetEnd: 113},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 114, TargetEnd: 119},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 115, TargetEnd: 120},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Multiple Match - Out of Order",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 24, TargetEnd: 19},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 24, TargetEnd: 19},
|
||
|
|
{SrcStart: 23, SrcEnd: 29, TargetStart: 30, TargetEnd: 37},
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 110, TargetEnd: 113},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 114, TargetEnd: 119},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 114, TargetEnd: 119},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 115, TargetEnd: 120},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 124, TargetEnd: 119},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 124, TargetEnd: 119},
|
||
|
|
{SrcStart: 23, SrcEnd: 29, TargetStart: 130, TargetEnd: 137},
|
||
|
|
},
|
||
|
|
want: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 10, TargetEnd: 13},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 14, TargetEnd: 19},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 15, TargetEnd: 20},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 24, TargetEnd: 19},
|
||
|
|
{SrcStart: 23, SrcEnd: 29, TargetStart: 30, TargetEnd: 37},
|
||
|
|
{SrcStart: 0, SrcEnd: 3, TargetStart: 110, TargetEnd: 113},
|
||
|
|
{SrcStart: 5, SrcEnd: 10, TargetStart: 114, TargetEnd: 119},
|
||
|
|
{SrcStart: 6, SrcEnd: 11, TargetStart: 115, TargetEnd: 120},
|
||
|
|
{SrcStart: 15, SrcEnd: 20, TargetStart: 124, TargetEnd: 119},
|
||
|
|
{SrcStart: 23, SrcEnd: 29, TargetStart: 130, TargetEnd: 137},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, tt := range tests {
|
||
|
|
got := untangleSourceRanges(tt.mr)
|
||
|
|
if len(got) != len(tt.want) {
|
||
|
|
t.Errorf("Number of matches %v, want %v", len(got), len(tt.want))
|
||
|
|
}
|
||
|
|
if !reflect.DeepEqual(got, tt.want) {
|
||
|
|
t.Errorf("UntangleSourceRanges(%q) = %v, want %v", tt.description, got, tt.want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_SplitRanges(t *testing.T) {
|
||
|
|
tests := []struct {
|
||
|
|
description string
|
||
|
|
mr MatchRanges
|
||
|
|
want []MatchRanges
|
||
|
|
}{
|
||
|
|
{
|
||
|
|
description: "Single Match Range",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Two Match Ranges",
|
||
|
|
mr: MatchRanges{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 108, TargetEnd: 115},
|
||
|
|
{SrcStart: 23, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 108, TargetEnd: 115},
|
||
|
|
{SrcStart: 23, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, tt := range tests {
|
||
|
|
got := splitRanges(tt.mr)
|
||
|
|
if len(got) != len(tt.want) {
|
||
|
|
t.Errorf("Number of matches %v, want %v", len(got), len(tt.want))
|
||
|
|
}
|
||
|
|
if !reflect.DeepEqual(got, tt.want) {
|
||
|
|
t.Errorf("SplitRanges(%q) = %v, want %v", tt.description, got, tt.want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestSearchSet_MergeConsecutiveRanges(t *testing.T) {
|
||
|
|
tests := []struct {
|
||
|
|
description string
|
||
|
|
mr []MatchRanges
|
||
|
|
want []MatchRanges
|
||
|
|
}{
|
||
|
|
{
|
||
|
|
description: "No Overlap",
|
||
|
|
mr: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 108, TargetEnd: 115},
|
||
|
|
{SrcStart: 23, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 108, TargetEnd: 115},
|
||
|
|
{SrcStart: 23, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Consecutive Ranges No Overlap",
|
||
|
|
mr: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 35, TargetEnd: 41},
|
||
|
|
{SrcStart: 23, SrcEnd: 30, TargetStart: 125, TargetEnd: 135},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 35, TargetEnd: 41},
|
||
|
|
{SrcStart: 23, SrcEnd: 30, TargetStart: 125, TargetEnd: 135},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Consecutive Ranges with First Element Overlap",
|
||
|
|
mr: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 34, TargetEnd: 41},
|
||
|
|
{SrcStart: 33, SrcEnd: 40, TargetStart: 35, TargetEnd: 42},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 36, TargetStart: 25, TargetEnd: 41},
|
||
|
|
{SrcStart: 33, SrcEnd: 40, TargetStart: 35, TargetEnd: 42},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Consecutive Ranges with Overlap",
|
||
|
|
mr: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 34, TargetEnd: 41},
|
||
|
|
{SrcStart: 33, SrcEnd: 40, TargetStart: 45, TargetEnd: 52},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 36, TargetStart: 25, TargetEnd: 41},
|
||
|
|
{SrcStart: 33, SrcEnd: 40, TargetStart: 45, TargetEnd: 52},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Consecutive Ranges with Previous Deep Overlap",
|
||
|
|
mr: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
{SrcStart: 120, SrcEnd: 130, TargetStart: 37, TargetEnd: 47},
|
||
|
|
{SrcStart: 122, SrcEnd: 132, TargetStart: 39, TargetEnd: 49},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 34, TargetEnd: 41},
|
||
|
|
{SrcStart: 33, SrcEnd: 40, TargetStart: 45, TargetEnd: 52},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 36, TargetStart: 25, TargetEnd: 41},
|
||
|
|
{SrcStart: 33, SrcEnd: 40, TargetStart: 45, TargetEnd: 52},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
description: "Consecutive Ranges with Deep Overlap",
|
||
|
|
mr: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
{SrcStart: 24, SrcEnd: 34, TargetStart: 29, TargetEnd: 39},
|
||
|
|
{SrcStart: 120, SrcEnd: 130, TargetStart: 37, TargetEnd: 47},
|
||
|
|
{SrcStart: 122, SrcEnd: 132, TargetStart: 39, TargetEnd: 49},
|
||
|
|
},
|
||
|
|
{
|
||
|
|
{SrcStart: 3, SrcEnd: 10, TargetStart: 26, TargetEnd: 33},
|
||
|
|
{SrcStart: 5, SrcEnd: 12, TargetStart: 28, TargetEnd: 35},
|
||
|
|
{SrcStart: 25, SrcEnd: 35, TargetStart: 31, TargetEnd: 41},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
want: []MatchRanges{
|
||
|
|
{
|
||
|
|
{SrcStart: 0, SrcEnd: 10, TargetStart: 5, TargetEnd: 15},
|
||
|
|
{SrcStart: 20, SrcEnd: 30, TargetStart: 25, TargetEnd: 35},
|
||
|
|
{SrcStart: 24, SrcEnd: 34, TargetStart: 29, TargetEnd: 39},
|
||
|
|
{SrcStart: 25, SrcEnd: 35, TargetStart: 31, TargetEnd: 41},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
for _, tt := range tests {
|
||
|
|
got := mergeConsecutiveRanges(tt.mr)
|
||
|
|
if len(got) != len(tt.want) {
|
||
|
|
t.Errorf("Number of matches %v, want %v", len(got), len(tt.want))
|
||
|
|
}
|
||
|
|
if !reflect.DeepEqual(got, tt.want) {
|
||
|
|
t.Errorf("SplitRanges(%q) = %v, want %v", tt.description, got, tt.want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|