72 lines
1.9 KiB
Bash
Executable File
72 lines
1.9 KiB
Bash
Executable File
#!/bin/sh
|
|
# Copyright 2015 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
# References:
|
|
# https://encoding.spec.whatwg.org/#euc-kr
|
|
|
|
# This script downloads the following file.
|
|
# https://encoding.spec.whatwg.org/index-euc-kr.txt
|
|
|
|
function preamble {
|
|
cat <<PREAMBLE
|
|
# ***************************************************************************
|
|
# *
|
|
# * Copyright (C) 1995-2015, International Business Machines
|
|
# * Corporation and others. All Rights Reserved.
|
|
# *
|
|
# * Generated per the algorithm for EUC-KR
|
|
# * described at http://encoding.spec.whatwg.org/#euc-kr
|
|
# *
|
|
# ***************************************************************************
|
|
<code_set_name> "euc-kr-html"
|
|
<mb_cur_max> 2
|
|
<mb_cur_min> 1
|
|
<uconv_class> "MBCS"
|
|
<subchar> \x3F
|
|
<icu:charsetFamily> "ASCII"
|
|
|
|
# 81-fe in states 2 and 3 can be tigher and a1-fe, but
|
|
# to be compliant to HTML5 spec, it should be 81-fe.
|
|
<icu:state> 0-7f, 81-c5:1, c6:2, c7-fe:3
|
|
<icu:state> 41-5a, 61-7a, 81-fe
|
|
<icu:state> 41-52, 81-fe
|
|
<icu:state> 81-fe
|
|
|
|
CHARMAP
|
|
PREAMBLE
|
|
}
|
|
|
|
function ascii {
|
|
for i in $(seq 0 127)
|
|
do
|
|
printf '<U%04X> \\x%02X |0\n' $i $i
|
|
done
|
|
}
|
|
|
|
|
|
# HKSCS characters are not supported in encoding ( |lead < 0xA1| )
|
|
function euckr {
|
|
awk '!/^#/ && !/^$/ \
|
|
{ pointer = $1; \
|
|
ucs = substr($2, 3); \
|
|
lead = pointer / 190 + 0x81; \
|
|
trail = $1 % 190 + 0x41; \
|
|
tag = 0; \
|
|
printf ("<U%4s> \\x%02X\\x%02X |%d\n", ucs,\
|
|
lead, trail, tag);\
|
|
}' \
|
|
index-euc-kr.txt
|
|
}
|
|
|
|
function unsorted_table {
|
|
euckr
|
|
}
|
|
|
|
wget -N -r -nd https://encoding.spec.whatwg.org/index-euc-kr.txt
|
|
preamble
|
|
ascii
|
|
unsorted_table | sort -k1 | uniq
|
|
echo 'END CHARMAP'
|