733 lines
20 KiB
Python
733 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Sequence
|
|
from typing import Tuple
|
|
from fontTools import ttLib
|
|
import tempfile
|
|
import subprocess
|
|
import json
|
|
import argparse
|
|
import contextlib
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
# list of specific files to be ignored.
|
|
IGNORE_FILE_NAME = [
|
|
# Exclude myself
|
|
"generate_notice.py",
|
|
|
|
# License files
|
|
"LICENSE",
|
|
"LICENSE_APACHE2.TXT",
|
|
"LICENSE_FSFAP.TXT",
|
|
"LICENSE_GPLv2.TXT",
|
|
"LICENSE_GPLv2_WITH_AUTOCONF_EXCEPTION.TXT",
|
|
"LICENSE_GPLv3_WITH_AUTOCONF_EXCEPTION.TXT",
|
|
"LICENSE_HPND_SELL_VARIANT.TXT",
|
|
"LICENSE_ISC.TXT",
|
|
"LICENSE_MIT_MODERN_VARIANT.TXT",
|
|
"LICENSE_OFL.TXT",
|
|
"METADATA",
|
|
"MODULE_LICENSE_MIT",
|
|
"NOTICE",
|
|
|
|
# dictionary which has Copyright word
|
|
"perf/texts/en-words.txt",
|
|
|
|
# broken unreadable font file for fuzzing target
|
|
"test/fuzzing/fonts/sbix-extents.ttf",
|
|
]
|
|
|
|
IGNORE_DIR_IF_NO_COPYRIGHT = [
|
|
"test",
|
|
"perf",
|
|
]
|
|
|
|
NO_COPYRIGHT_FILES = [
|
|
".ci/build-win32.sh",
|
|
".ci/build-win64.sh",
|
|
".ci/deploy-docs.sh",
|
|
".ci/publish_release_artifact.sh",
|
|
".ci/win32-cross-file.txt",
|
|
".ci/win64-cross-file.txt",
|
|
".circleci/config.yml",
|
|
".clang-format",
|
|
".codecov.yml",
|
|
".editorconfig",
|
|
".github/dependabot.yml",
|
|
".github/workflows/arm-ci.yml",
|
|
".github/workflows/cifuzz.yml",
|
|
".github/workflows/configs-build.yml",
|
|
".github/workflows/coverity-scan.yml",
|
|
".github/workflows/linux-ci.yml",
|
|
".github/workflows/macos-ci.yml",
|
|
".github/workflows/msvc-ci.yml",
|
|
".github/workflows/msys2-ci.yml",
|
|
"AUTHORS",
|
|
"BUILD.md",
|
|
"CMakeLists.txt",
|
|
"CONFIG.md",
|
|
"Makefile.am",
|
|
"NEWS",
|
|
"OWNERS",
|
|
"README",
|
|
"README.android",
|
|
"README.md",
|
|
"README.mingw.md",
|
|
"README.python.md",
|
|
"README.version",
|
|
"RELEASING.md",
|
|
"TESTING.md",
|
|
"TEST_MAPPING",
|
|
"THANKS",
|
|
"autogen.sh",
|
|
"configure.ac",
|
|
"docs/HarfBuzz.png",
|
|
"docs/HarfBuzz.svg",
|
|
"docs/Makefile.am",
|
|
"docs/features.dot",
|
|
"docs/harfbuzz-docs.xml",
|
|
"docs/harfbuzz-overrides.txt",
|
|
"docs/harfbuzz-sections.txt",
|
|
"docs/meson.build",
|
|
"docs/repacker.md",
|
|
"docs/serializer.md",
|
|
"docs/subset-preprocessing.md",
|
|
"docs/usermanual-buffers-language-script-and-direction.xml",
|
|
"docs/usermanual-clusters.xml",
|
|
"docs/usermanual-fonts-and-faces.xml",
|
|
"docs/usermanual-getting-started.xml",
|
|
"docs/usermanual-glyph-information.xml",
|
|
"docs/usermanual-install-harfbuzz.xml",
|
|
"docs/usermanual-integration.xml",
|
|
"docs/usermanual-object-model.xml",
|
|
"docs/usermanual-opentype-features.xml",
|
|
"docs/usermanual-shaping-concepts.xml",
|
|
"docs/usermanual-utilities.xml",
|
|
"docs/usermanual-what-is-harfbuzz.xml",
|
|
"docs/version.xml.in",
|
|
"harfbuzz.doap",
|
|
"meson.build",
|
|
"meson_options.txt",
|
|
"mingw-configure.sh",
|
|
"replace-enum-strings.cmake",
|
|
"src/ArabicPUASimplified.txt",
|
|
"src/ArabicPUATraditional.txt",
|
|
"src/Makefile.am",
|
|
"src/Makefile.sources",
|
|
"src/OT/Layout/GPOS/Anchor.hh",
|
|
"src/OT/Layout/GPOS/AnchorFormat1.hh",
|
|
"src/OT/Layout/GPOS/AnchorFormat2.hh",
|
|
"src/OT/Layout/GPOS/AnchorFormat3.hh",
|
|
"src/OT/Layout/GPOS/AnchorMatrix.hh",
|
|
"src/OT/Layout/GPOS/ChainContextPos.hh",
|
|
"src/OT/Layout/GPOS/Common.hh",
|
|
"src/OT/Layout/GPOS/ContextPos.hh",
|
|
"src/OT/Layout/GPOS/CursivePos.hh",
|
|
"src/OT/Layout/GPOS/CursivePosFormat1.hh",
|
|
"src/OT/Layout/GPOS/ExtensionPos.hh",
|
|
"src/OT/Layout/GPOS/GPOS.hh",
|
|
"src/OT/Layout/GPOS/LigatureArray.hh",
|
|
"src/OT/Layout/GPOS/MarkArray.hh",
|
|
"src/OT/Layout/GPOS/MarkBasePos.hh",
|
|
"src/OT/Layout/GPOS/MarkBasePosFormat1.hh",
|
|
"src/OT/Layout/GPOS/MarkLigPos.hh",
|
|
"src/OT/Layout/GPOS/MarkLigPosFormat1.hh",
|
|
"src/OT/Layout/GPOS/MarkMarkPos.hh",
|
|
"src/OT/Layout/GPOS/MarkMarkPosFormat1.hh",
|
|
"src/OT/Layout/GPOS/MarkRecord.hh",
|
|
"src/OT/Layout/GPOS/PairPos.hh",
|
|
"src/OT/Layout/GPOS/PairPosFormat1.hh",
|
|
"src/OT/Layout/GPOS/PairPosFormat2.hh",
|
|
"src/OT/Layout/GPOS/PairSet.hh",
|
|
"src/OT/Layout/GPOS/PairValueRecord.hh",
|
|
"src/OT/Layout/GPOS/PosLookup.hh",
|
|
"src/OT/Layout/GPOS/PosLookupSubTable.hh",
|
|
"src/OT/Layout/GPOS/SinglePos.hh",
|
|
"src/OT/Layout/GPOS/SinglePosFormat1.hh",
|
|
"src/OT/Layout/GPOS/SinglePosFormat2.hh",
|
|
"src/OT/Layout/GPOS/ValueFormat.hh",
|
|
"src/OT/Layout/GSUB/AlternateSet.hh",
|
|
"src/OT/Layout/GSUB/AlternateSubst.hh",
|
|
"src/OT/Layout/GSUB/AlternateSubstFormat1.hh",
|
|
"src/OT/Layout/GSUB/ChainContextSubst.hh",
|
|
"src/OT/Layout/GSUB/Common.hh",
|
|
"src/OT/Layout/GSUB/ContextSubst.hh",
|
|
"src/OT/Layout/GSUB/ExtensionSubst.hh",
|
|
"src/OT/Layout/GSUB/GSUB.hh",
|
|
"src/OT/Layout/GSUB/Ligature.hh",
|
|
"src/OT/Layout/GSUB/LigatureSet.hh",
|
|
"src/OT/Layout/GSUB/LigatureSubst.hh",
|
|
"src/OT/Layout/GSUB/LigatureSubstFormat1.hh",
|
|
"src/OT/Layout/GSUB/MultipleSubst.hh",
|
|
"src/OT/Layout/GSUB/MultipleSubstFormat1.hh",
|
|
"src/OT/Layout/GSUB/ReverseChainSingleSubst.hh",
|
|
"src/OT/Layout/GSUB/ReverseChainSingleSubstFormat1.hh",
|
|
"src/OT/Layout/GSUB/Sequence.hh",
|
|
"src/OT/Layout/GSUB/SingleSubst.hh",
|
|
"src/OT/Layout/GSUB/SingleSubstFormat1.hh",
|
|
"src/OT/Layout/GSUB/SingleSubstFormat2.hh",
|
|
"src/OT/Layout/GSUB/SubstLookup.hh",
|
|
"src/OT/Layout/GSUB/SubstLookupSubTable.hh",
|
|
"src/OT/glyf/CompositeGlyph.hh",
|
|
"src/OT/glyf/Glyph.hh",
|
|
"src/OT/glyf/GlyphHeader.hh",
|
|
"src/OT/glyf/SimpleGlyph.hh",
|
|
"src/OT/glyf/SubsetGlyph.hh",
|
|
"src/OT/glyf/VarCompositeGlyph.hh",
|
|
"src/OT/glyf/composite-iter.hh",
|
|
"src/OT/glyf/coord-setter.hh",
|
|
"src/OT/glyf/glyf-helpers.hh",
|
|
"src/OT/glyf/glyf.hh",
|
|
"src/OT/glyf/loca.hh",
|
|
"src/OT/glyf/path-builder.hh",
|
|
"src/check-c-linkage-decls.py",
|
|
"src/check-externs.py",
|
|
"src/check-header-guards.py",
|
|
"src/check-includes.py",
|
|
"src/check-libstdc++.py",
|
|
"src/check-static-inits.py",
|
|
"src/check-symbols.py",
|
|
"src/fix_get_types.py",
|
|
"src/gen-arabic-joining-list.py",
|
|
"src/gen-arabic-pua.py",
|
|
"src/gen-arabic-table.py",
|
|
"src/gen-def.py",
|
|
"src/gen-emoji-table.py",
|
|
"src/gen-harfbuzzcc.py",
|
|
"src/gen-hb-version.py",
|
|
"src/gen-indic-table.py",
|
|
"src/gen-os2-unicode-ranges.py",
|
|
"src/gen-ragel-artifacts.py",
|
|
"src/gen-tag-table.py",
|
|
"src/gen-ucd-table.py",
|
|
"src/gen-use-table.py",
|
|
"src/gen-vowel-constraints.py",
|
|
"src/harfbuzz-config.cmake.in",
|
|
"src/harfbuzz-gobject.pc.in",
|
|
"src/harfbuzz-icu.pc.in",
|
|
"src/harfbuzz-subset.cc",
|
|
"src/harfbuzz-subset.pc.in",
|
|
"src/harfbuzz.cc",
|
|
"src/harfbuzz.pc.in",
|
|
"src/hb-ot-shaper-arabic-joining-list.hh",
|
|
"src/hb-ot-shaper-arabic-pua.hh",
|
|
"src/hb-ot-shaper-arabic-table.hh",
|
|
"src/hb-ot-shaper-indic-table.cc",
|
|
"src/hb-ot-shaper-use-table.hh",
|
|
"src/hb-ot-shaper-vowel-constraints.cc",
|
|
"src/hb-ot-tag-table.hh",
|
|
"src/hb-ucd-table.hh",
|
|
"src/hb-unicode-emoji-table.hh",
|
|
"src/meson.build",
|
|
"src/ms-use/IndicPositionalCategory-Additional.txt",
|
|
"src/ms-use/IndicShapingInvalidCluster.txt",
|
|
"src/ms-use/IndicSyllabicCategory-Additional.txt",
|
|
"src/sample.py",
|
|
"src/test-use-table.cc",
|
|
"src/update-unicode-tables.make",
|
|
"subprojects/.gitignore",
|
|
"subprojects/cairo.wrap",
|
|
"subprojects/freetype2.wrap",
|
|
"subprojects/glib.wrap",
|
|
"subprojects/google-benchmark.wrap",
|
|
"subprojects/packagefiles/ragel/meson.build",
|
|
"subprojects/ragel.wrap",
|
|
"subprojects/zlib.wrap",
|
|
"util/Makefile.am",
|
|
"util/Makefile.sources",
|
|
"util/meson.build",
|
|
]
|
|
|
|
class CommentType(Enum):
|
|
C_STYLE_BLOCK = 1 # /* ... */
|
|
C_STYLE_BLOCK_AS_LINE = 2 # /* ... */ but uses multiple lines of block comments.
|
|
C_STYLE_LINE = 3 # // ...
|
|
SCRIPT_STYLE_HASH = 4 # # ...
|
|
OPENTYPE_NAME = 5
|
|
OPENTYPE_COLLECTION_NAME = 6
|
|
UNKNOWN = 10000
|
|
|
|
|
|
# Helper function of showing error message and immediate exit.
|
|
def fatal(msg: str):
|
|
sys.stderr.write(str(msg))
|
|
sys.stderr.write("\n")
|
|
sys.exit(1)
|
|
|
|
|
|
def warn(msg: str):
|
|
sys.stderr.write(str(msg))
|
|
sys.stderr.write("\n")
|
|
|
|
def debug(msg: str):
|
|
# sys.stderr.write(str(msg))
|
|
# sys.stderr.write("\n")
|
|
pass
|
|
|
|
|
|
def cleanup_and_join(out_lines: Sequence[str]):
|
|
while not out_lines[-1].strip():
|
|
out_lines.pop(-1)
|
|
|
|
# If all lines starts from empty space, strip it out.
|
|
while all([len(x) == 0 or x[0] == ' ' for x in out_lines]):
|
|
out_lines = [x[1:] for x in out_lines]
|
|
|
|
if not out_lines:
|
|
fatal("Failed to get copyright info")
|
|
return "\n".join(out_lines)
|
|
|
|
|
|
def get_comment_type(copyright_line: str, path_str: str) -> CommentType:
|
|
# vms_make.com contains multiple copyright header as a string constants.
|
|
if copyright_line.startswith("#"):
|
|
return CommentType.SCRIPT_STYLE_HASH
|
|
if copyright_line.startswith("//"):
|
|
return CommentType.C_STYLE_LINE
|
|
return CommentType.C_STYLE_BLOCK
|
|
|
|
def extract_copyright_font(path_str: str) -> str:
|
|
path = Path(path_str)
|
|
if path.suffix in ['.ttf', '.otf', '.dfont']:
|
|
return extract_from_opentype_name(path, 0)
|
|
elif path.suffix in ['.ttc', '.otc']:
|
|
return extract_from_opentype_collection_name(path)
|
|
|
|
|
|
# Extract copyright notice and returns next index.
|
|
def extract_copyright_at(lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
|
|
commentType = get_comment_type(lines[i], path)
|
|
|
|
if commentType == CommentType.C_STYLE_BLOCK:
|
|
return extract_from_c_style_block_at(lines, i, path)
|
|
elif commentType == CommentType.C_STYLE_LINE:
|
|
return extract_from_c_style_lines_at(lines, i, path)
|
|
elif commentType == CommentType.SCRIPT_STYLE_HASH:
|
|
return extract_from_script_hash_at(lines, i, path)
|
|
else:
|
|
fatal("Uknown comment style: %s" % lines[i])
|
|
|
|
def extract_from_opentype_collection_name(path: str) -> str:
|
|
|
|
with open(path, mode="rb") as f:
|
|
head = f.read(12)
|
|
|
|
if head[0:4].decode() != 'ttcf':
|
|
fatal('Invalid magic number for TTC file: %s' % path)
|
|
numFonts = int.from_bytes(head[8:12], byteorder="big")
|
|
|
|
licenses = set()
|
|
for i in range(0, numFonts):
|
|
license = extract_from_opentype_name(path, i)
|
|
licenses.add(license)
|
|
|
|
return '\n\n'.join(licenses)
|
|
|
|
def extract_from_opentype_name(path: str, index: int) -> str:
|
|
|
|
def get_preferred_name(nameID: int, ttf):
|
|
def get_score(platID: int, encID: int):
|
|
if platID == 3 and encID == 10:
|
|
return 0
|
|
elif platID == 0 and encID == 6:
|
|
return 1
|
|
elif platID == 0 and encID == 4:
|
|
return 2
|
|
elif platID == 3 and encID == 1:
|
|
return 3
|
|
elif platID == 0 and encID == 3:
|
|
return 4
|
|
elif platID == 0 and encID == 2:
|
|
return 5
|
|
elif platID == 0 and encID == 1:
|
|
return 6
|
|
elif platID == 0 and encID == 0:
|
|
return 7
|
|
else:
|
|
return 10000
|
|
|
|
best_score = 1000000
|
|
best_name = None
|
|
|
|
if 'name' not in ttf:
|
|
return None
|
|
|
|
for name in ttf['name'].names:
|
|
if name.nameID != nameID:
|
|
continue
|
|
|
|
score = get_score(name.platformID, name.platEncID)
|
|
if score < best_score:
|
|
best_score = score
|
|
best_name = name
|
|
|
|
return best_name
|
|
|
|
def get_notice_from_cff(ttf):
|
|
if 'CFF ' not in ttf:
|
|
return None
|
|
|
|
# Looks like there is no way of getting Notice line in CFF table.
|
|
# Use the line that has "Copyright" in the string pool.
|
|
cff = ttf['CFF '].cff
|
|
for string in cff.strings:
|
|
if 'Copyright' in string:
|
|
return string
|
|
return None
|
|
|
|
with contextlib.closing(ttLib.TTFont(path, 0, fontNumber=index)) as ttf:
|
|
copyright = get_preferred_name(0, ttf)
|
|
if not copyright:
|
|
copyright = get_notice_from_cff(ttf)
|
|
if not copyright:
|
|
return None
|
|
|
|
license_description = get_preferred_name(13, ttf)
|
|
|
|
if license_description:
|
|
copyright = str(copyright) + "\n\n" + str(license_description)
|
|
else:
|
|
copyright = str(copyright)
|
|
|
|
license_url = get_preferred_name(14, ttf)
|
|
|
|
if license_url:
|
|
copyright = str(copyright) + "\n\n" + str(license_url)
|
|
else:
|
|
copyright = str(copyright)
|
|
|
|
return copyright
|
|
|
|
def extract_from_c_style_lines_at(
|
|
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
|
|
def is_copyright_end(line):
|
|
if line.startswith("//"):
|
|
return False
|
|
else:
|
|
return True
|
|
start = i
|
|
while i < len(lines):
|
|
if is_copyright_end(lines[i]):
|
|
break
|
|
i += 1
|
|
end = i
|
|
|
|
if start == end:
|
|
fatal("Failed to get copyright info")
|
|
|
|
out_lines = []
|
|
for line in lines[start:end]:
|
|
if line.startswith("//# "): # Andorid.bp uses //# style
|
|
out_lines.append(line[4:])
|
|
elif line.startswith("//#"): # Andorid.bp uses //# style
|
|
out_lines.append(line[3:])
|
|
elif line.startswith("// "):
|
|
out_lines.append(line[3:])
|
|
elif line == "//":
|
|
out_lines.append(line[2:])
|
|
else:
|
|
out_lines.append(line)
|
|
|
|
return (cleanup_and_join(out_lines), i + 1)
|
|
|
|
|
|
def extract_from_script_hash_at(
|
|
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
|
|
if lines[i].strip()[0] != "#":
|
|
return (None, i + 1)
|
|
def is_copyright_end(lines: str, i: int) -> bool:
|
|
if "#" not in lines[i]:
|
|
return True
|
|
# treat double spacing as end of license header
|
|
if lines[i] == "#" and lines[i+1] == "#":
|
|
return True
|
|
return False
|
|
|
|
start = i
|
|
while i < len(lines):
|
|
if is_copyright_end(lines, i):
|
|
break
|
|
i += 1
|
|
end = i
|
|
|
|
if start == end:
|
|
fatal("Failed to get copyright info")
|
|
|
|
out_lines = []
|
|
for line in lines[start:end]:
|
|
if line.startswith("# "):
|
|
out_lines.append(line[2:])
|
|
elif line == "#":
|
|
out_lines.append(line[1:])
|
|
else:
|
|
out_lines.append(line)
|
|
|
|
return (cleanup_and_join(out_lines), i + 1)
|
|
|
|
|
|
def extract_from_c_style_block_at(
|
|
lines: Sequence[str], i: int, path: str) -> Tuple[str, int]:
|
|
|
|
def is_copyright_end(lines: str, i: int) -> bool:
|
|
if "*/" in lines[i]:
|
|
return True
|
|
if lines[i] == " *" and lines[i + 1] == " *":
|
|
return True
|
|
if lines[i] == "" and lines[i + 1] == "":
|
|
return True
|
|
return False
|
|
|
|
start = i
|
|
i += 1 # include at least one line
|
|
while i < len(lines):
|
|
if is_copyright_end(lines, i):
|
|
break
|
|
i += 1
|
|
end = i + 1
|
|
|
|
out_lines = []
|
|
for line in lines[start:end]:
|
|
clean_line = line
|
|
|
|
# Strip begining "/*" chars
|
|
if clean_line.startswith("/* "):
|
|
clean_line = clean_line[3:]
|
|
if clean_line == "/*":
|
|
clean_line = clean_line[2:]
|
|
|
|
# Strip ending "*/" chars
|
|
if clean_line.endswith(" */"):
|
|
clean_line = clean_line[:-3]
|
|
if clean_line.endswith("*/"):
|
|
clean_line = clean_line[:-2]
|
|
|
|
# Strip starting " *" chars
|
|
if clean_line.startswith(" * "):
|
|
clean_line = clean_line[3:]
|
|
if clean_line == " *":
|
|
clean_line = clean_line[2:]
|
|
|
|
# hb-aots-tester.cpp has underline separater which can be dropped.
|
|
if path.endswith("test/shape/data/aots/hb-aots-tester.cpp"):
|
|
clean_line = clean_line.replace("_", "")
|
|
|
|
# Strip trailing spaces
|
|
clean_line = clean_line.rstrip()
|
|
|
|
out_lines.append(clean_line)
|
|
|
|
return (cleanup_and_join(out_lines), i + 1)
|
|
|
|
|
|
# Returns true if the line shows the start of copyright notice.
|
|
def is_copyright_line(line: str, path: str) -> bool:
|
|
if "Copyright" not in line:
|
|
return False
|
|
|
|
# For avoiding unexpected mismatches, exclude quoted Copyright string.
|
|
if "`Copyright'" in line:
|
|
return False
|
|
if "\"Copyright\"" in line:
|
|
return False
|
|
|
|
if "OpCode_Copyright" in line:
|
|
return False
|
|
|
|
if path.endswith("src/hb-ot-name.h") and "HB_OT_NAME_ID_COPYRIGHT" in line:
|
|
return False
|
|
|
|
return True
|
|
|
|
def assert_mandatory_copyright(path_str: str):
|
|
path = Path(path_str)
|
|
toplevel_dir = str(path).split(os.sep)[0]
|
|
|
|
if toplevel_dir in IGNORE_DIR_IF_NO_COPYRIGHT:
|
|
return
|
|
|
|
fatal("%s does not contain Copyright line" % path)
|
|
|
|
|
|
# Extract the copyright notice and put it into copyrights arg.
|
|
def do_file(path: str, copyrights: set, no_copyright_files: set):
|
|
raw = Path(path).read_bytes()
|
|
basename = os.path.basename(path)
|
|
dirname = os.path.dirname(path)
|
|
|
|
is_font = (dirname.endswith('./test/fuzzing/fonts') or
|
|
Path(path).suffix in ['.ttf', '.otf', '.dfont', '.ttc', '.otc'])
|
|
|
|
if is_font:
|
|
notice = extract_copyright_font(path)
|
|
if not notice:
|
|
assert_mandatory_copyright(path)
|
|
return
|
|
|
|
if not notice in copyrights:
|
|
copyrights[notice] = []
|
|
copyrights[notice].append(path)
|
|
else:
|
|
try:
|
|
content = raw.decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
content = raw.decode("iso-8859-1")
|
|
|
|
if not "Copyright" in content:
|
|
if path in no_copyright_files:
|
|
no_copyright_files.remove(path)
|
|
else:
|
|
assert_mandatory_copyright(path)
|
|
return
|
|
|
|
lines = content.splitlines()
|
|
|
|
# The COPYING in the in-house dir has full OFL license with description.
|
|
# Use the OFL license description body.
|
|
if path.endswith("test/shape/data/in-house/COPYING"):
|
|
notice = cleanup_and_join(lines[9:])
|
|
copyrights.setdefault(notice, [])
|
|
copyrights[notice].append(path)
|
|
return
|
|
|
|
# The COPYING in the top dir has MIT-Modern-Variant license with description.
|
|
# Use the entire file as a license notice.
|
|
if path.endswith("COPYING") and str(Path(path)) == 'COPYING':
|
|
notice = cleanup_and_join(lines)
|
|
copyrights.setdefault(notice, [])
|
|
copyrights[notice].append(path)
|
|
return
|
|
|
|
i = 0
|
|
license_found = False
|
|
while i < len(lines):
|
|
if is_copyright_line(lines[i], path):
|
|
(notice, nexti) = extract_copyright_at(lines, i, path)
|
|
if notice:
|
|
copyrights.setdefault(notice, [])
|
|
copyrights[notice].append(path)
|
|
license_found = True
|
|
|
|
i = nexti
|
|
else:
|
|
i += 1
|
|
|
|
if not license_found:
|
|
assert_mandatory_copyright(path)
|
|
|
|
def do_check(path, format):
|
|
if not path.endswith('/'): # make sure the path ends with slash
|
|
path = path + '/'
|
|
|
|
file_to_ignore = set([os.path.join(path, x) for x in IGNORE_FILE_NAME])
|
|
no_copyright_files = set([os.path.join(path, x) for x in NO_COPYRIGHT_FILES])
|
|
copyrights = {}
|
|
|
|
for directory, sub_directories, filenames in os.walk(path):
|
|
# skip .git directory
|
|
if ".git" in sub_directories:
|
|
sub_directories.remove(".git")
|
|
|
|
for fname in filenames:
|
|
fpath = os.path.join(directory, fname)
|
|
if fpath in file_to_ignore:
|
|
file_to_ignore.remove(fpath)
|
|
continue
|
|
|
|
do_file(fpath, copyrights, no_copyright_files)
|
|
|
|
if len(file_to_ignore) != 0:
|
|
fatal("Following files are listed in IGNORE_FILE_NAME but doesn't exists,.\n"
|
|
+ "\n".join(file_to_ignore))
|
|
|
|
if len(no_copyright_files) != 0:
|
|
fatal("Following files are listed in NO_COPYRIGHT_FILES but doesn't exists.\n"
|
|
+ "\n".join(no_copyright_files))
|
|
|
|
if format == Format.notice:
|
|
print_notice(copyrights, False)
|
|
elif format == Format.notice_with_filename:
|
|
print_notice(copyrights, True)
|
|
elif format == Format.html:
|
|
print_html(copyrights)
|
|
elif format == Format.json:
|
|
print_json(copyrights)
|
|
|
|
def print_html(copyrights):
|
|
print('<html>')
|
|
print("""
|
|
<head>
|
|
<style>
|
|
table {
|
|
font-family: monospace
|
|
}
|
|
|
|
table tr td {
|
|
padding: 10px 10px 10px 10px
|
|
}
|
|
</style>
|
|
</head>
|
|
""")
|
|
print('<body>')
|
|
print('<table border="1" style="border-collapse:collapse">')
|
|
for notice in sorted(copyrights.keys()):
|
|
files = sorted(copyrights[notice])
|
|
|
|
print('<tr>')
|
|
print('<td>')
|
|
print('<ul>')
|
|
for file in files:
|
|
print('<li>%s</li>' % file)
|
|
print('</ul>')
|
|
print('</td>')
|
|
|
|
print('<td>')
|
|
print('<p>%s</p>' % notice.replace('\n', '<br>'))
|
|
print('</td>')
|
|
|
|
print('</tr>')
|
|
|
|
|
|
print('</table>')
|
|
print('</body></html>')
|
|
|
|
def print_notice(copyrights, print_file):
|
|
# print the copyright in sorted order for stable output.
|
|
for notice in sorted(copyrights.keys()):
|
|
if print_file:
|
|
files = sorted(copyrights[notice])
|
|
print("\n".join(files))
|
|
print()
|
|
print(notice)
|
|
print()
|
|
print("-" * 67)
|
|
print()
|
|
|
|
def print_json(copyrights):
|
|
print(json.dumps(copyrights))
|
|
|
|
class Format(Enum):
|
|
notice = 'notice'
|
|
notice_with_filename = 'notice_with_filename'
|
|
html = 'html'
|
|
json = 'json'
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Collect notice headers.")
|
|
parser.add_argument("--format", dest="format", type=Format, choices=list(Format),
|
|
default=Format.notice, help="print filename before the license notice")
|
|
parser.add_argument("--target", dest="target", action='store',
|
|
required=True, help="target directory to collect notice headers")
|
|
res = parser.parse_args()
|
|
do_check(res.target, res.format)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|