unplugged-system/external/licenseclassifier/tools/identify_license/backend/backend.go

167 lines
4.8 KiB
Go

// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package backend contains the necessary functions to classify a license.
package backend
import (
"context"
"fmt"
"io/ioutil"
"log"
"sync"
"time"
"github.com/google/licenseclassifier"
"github.com/google/licenseclassifier/commentparser"
"github.com/google/licenseclassifier/commentparser/language"
"github.com/google/licenseclassifier/tools/identify_license/results"
)
// ClassifierInterface is the interface each backend must implement.
type ClassifierInterface interface {
Close()
ClassifyLicenses(filenames []string, headers bool) []error
ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) []error
GetResults() results.LicenseTypes
}
// ClassifierBackend is an object that handles classifying a license.
type ClassifierBackend struct {
results results.LicenseTypes
mu sync.Mutex
classifier *licenseclassifier.License
}
// New creates a new backend working on the local filesystem.
func New(threshold float64, forbiddenOnly bool) (*ClassifierBackend, error) {
var lc *licenseclassifier.License
var err error
if forbiddenOnly {
lc, err = licenseclassifier.NewWithForbiddenLicenses(threshold)
} else {
lc, err = licenseclassifier.New(threshold)
}
if err != nil {
return nil, err
}
return &ClassifierBackend{classifier: lc}, nil
}
// Close does nothing here since there's nothing to close.
func (b *ClassifierBackend) Close() {
}
// ClassifyLicenses runs the license classifier over the given file.
func (b *ClassifierBackend) ClassifyLicenses(filenames []string, headers bool) (errors []error) {
return b.ClassifyLicensesWithContext(context.Background(), filenames, headers)
}
// ClassifyLicensesWithContext runs the license classifier over the given file;
// ensure that it will respect the timeout and cancelation in the provided context.
func (b *ClassifierBackend) ClassifyLicensesWithContext(ctx context.Context, filenames []string, headers bool) (errors []error) {
files := make(chan string, len(filenames))
for _, f := range filenames {
files <- f
}
close(files)
errs := make(chan error, len(filenames))
var wg sync.WaitGroup
// Create a pool from which tasks can later be started. We use a pool because the OS limits
// the number of files that can be open at any one time.
const numTasks = 1000
wg.Add(numTasks)
for i := 0; i < numTasks; i++ {
go func() {
// Ensure that however this function terminates, the wait group
// is unblocked
defer wg.Done()
for {
filename := <-files
// no file? we're done
if filename == "" {
break
}
// If the context is done, record that the file was not
// classified due to the context's termination.
if err := ctx.Err(); err != nil {
errs <- fmt.Errorf("file %s not classified due to context completion: %v", filename, err)
continue
}
if err := b.classifyLicense(filename, headers); err != nil {
errs <- err
}
}
}()
}
wg.Wait()
close(errs)
for err := range errs {
errors = append(errors, err)
}
return errors
}
// classifyLicense is called by a Go-function to perform the actual
// classification of a license.
func (b *ClassifierBackend) classifyLicense(filename string, headers bool) error {
contents, err := ioutil.ReadFile(filename)
if err != nil {
return fmt.Errorf("unable to read %q: %v", filename, err)
}
matchLoop := func(contents string) {
for _, m := range b.classifier.MultipleMatch(contents, headers) {
b.mu.Lock()
b.results = append(b.results, &results.LicenseType{
Filename: filename,
Name: m.Name,
Confidence: m.Confidence,
Offset: m.Offset,
Extent: m.Extent,
})
b.mu.Unlock()
}
}
log.Printf("Classifying license(s): %s", filename)
start := time.Now()
if lang := language.ClassifyLanguage(filename); lang == language.Unknown {
matchLoop(string(contents))
} else {
log.Printf("detected language: %v", lang)
comments := commentparser.Parse(contents, lang)
for ch := range comments.ChunkIterator() {
matchLoop(ch.String())
}
}
log.Printf("Finished Classifying License %q: %v", filename, time.Since(start))
return nil
}
// GetResults returns the results of the classifications.
func (b *ClassifierBackend) GetResults() results.LicenseTypes {
return b.results
}