tools: support full-icu by default
Instead of an English-only icudt64l.dat in the repo, we now have icudt64l.dat.gz with all locales. - updated READMEs and docs - shrinker now copies source, and compresses (bzip2) the ICU data file - configure expects deps/icu-small to be full ICU with a full compressed data file Fixes: https://github.com/nodejs/node/issues/19214 Co-Authored-By: Richard Lau <riclau@uk.ibm.com> Co-Authored-By: Jan Olaf Krems <jan.krems@gmail.com> Co-Authored-By: James M Snell <jasnell@gmail.com> PR-URL: https://github.com/nodejs/node/pull/29522 Reviewed-By: Jan Krems <jan.krems@gmail.com> Reviewed-By: Jiawen Geng <technicalcute@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com> Reviewed-By: Michaël Zasso <targos@protonmail.com>
This commit is contained in:
parent
a71fb978a4
commit
1a25e901b7
49
BUILDING.md
49
BUILDING.md
@ -35,21 +35,23 @@ file a new issue.
|
||||
* [Building Node.js](#building-nodejs-1)
|
||||
* [Android/Android-based devices (e.g. Firefox OS)](#androidandroid-based-devices-eg-firefox-os)
|
||||
* [`Intl` (ECMA-402) support](#intl-ecma-402-support)
|
||||
* [Default: `small-icu` (English only) support](#default-small-icu-english-only-support)
|
||||
* [Build with full ICU support (all locales supported by ICU)](#build-with-full-icu-support-all-locales-supported-by-icu)
|
||||
* [Unix/macOS](#unixmacos)
|
||||
* [Windows](#windows-1)
|
||||
* [Building without Intl support](#building-without-intl-support)
|
||||
* [Trimmed: `small-icu` (English only) support](#trimmed-small-icu-english-only-support)
|
||||
* [Unix/macOS](#unixmacos-1)
|
||||
* [Windows](#windows-2)
|
||||
* [Use existing installed ICU (Unix/macOS only)](#use-existing-installed-icu-unixmacOS-only)
|
||||
* [Build with a specific ICU](#build-with-a-specific-icu)
|
||||
* [Building without Intl support](#building-without-intl-support)
|
||||
* [Unix/macOS](#unixmacos-2)
|
||||
* [Windows](#windows-3)
|
||||
* [Building Node.js with FIPS-compliant OpenSSL](#building-nodejs-with-fips-compliant-openssl)
|
||||
* [Building Node.js with external core modules](#building-nodejs-with-external-core-modules)
|
||||
* [Use existing installed ICU (Unix/macOS only)](#use-existing-installed-icu-unixmacOS-only)
|
||||
* [Build with a specific ICU](#build-with-a-specific-icu)
|
||||
* [Unix/macOS](#unixmacos-3)
|
||||
* [Windows](#windows-4)
|
||||
* [Building Node.js with FIPS-compliant OpenSSL](#building-nodejs-with-fips-compliant-openssl)
|
||||
* [Building Node.js with external core modules](#building-nodejs-with-external-core-modules)
|
||||
* [Unix/macOS](#unixmacos-4)
|
||||
* [Windows](#windows-5)
|
||||
* [Note for downstream distributors of Node.js](#note-for-downstream-distributors-of-nodejs)
|
||||
|
||||
## Supported platforms
|
||||
@ -598,31 +600,40 @@ $ make
|
||||
## `Intl` (ECMA-402) support
|
||||
|
||||
[Intl](https://github.com/nodejs/node/blob/master/doc/api/intl.md) support is
|
||||
enabled by default, with English data only.
|
||||
|
||||
### Default: `small-icu` (English only) support
|
||||
|
||||
By default, only English data is included, but
|
||||
the full `Intl` (ECMA-402) APIs. It does not need to download
|
||||
any dependencies to function. You can add full
|
||||
data at runtime.
|
||||
enabled by default.
|
||||
|
||||
### Build with full ICU support (all locales supported by ICU)
|
||||
|
||||
With the `--download=all`, this may download ICU if you don't have an
|
||||
ICU in `deps/icu`. (The embedded `small-icu` included in the default
|
||||
Node.js source does not include all locales.)
|
||||
This is the default option.
|
||||
|
||||
#### Unix/macOS
|
||||
|
||||
```console
|
||||
$ ./configure --with-intl=full-icu --download=all
|
||||
$ ./configure --with-intl=full-icu
|
||||
```
|
||||
|
||||
#### Windows
|
||||
|
||||
```console
|
||||
> .\vcbuild full-icu download-all
|
||||
> .\vcbuild full-icu
|
||||
```
|
||||
|
||||
### Trimmed: `small-icu` (English only) support
|
||||
|
||||
In this configuration, only English data is included, but
|
||||
the full `Intl` (ECMA-402) APIs. It does not need to download
|
||||
any dependencies to function. You can add full data at runtime.
|
||||
|
||||
#### Unix/macOS
|
||||
|
||||
```console
|
||||
$ ./configure --with-intl=small-icu
|
||||
```
|
||||
|
||||
#### Windows
|
||||
|
||||
```console
|
||||
> .\vcbuild small-icu
|
||||
```
|
||||
|
||||
### Building without Intl support
|
||||
|
87
configure.py
87
configure.py
@ -11,6 +11,8 @@ import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import shutil
|
||||
import bz2
|
||||
|
||||
from distutils.spawn import find_executable as which
|
||||
|
||||
# If not run from node/, cd to node/.
|
||||
@ -409,7 +411,7 @@ parser.add_option('--use-largepages-script-lld',
|
||||
intl_optgroup.add_option('--with-intl',
|
||||
action='store',
|
||||
dest='with_intl',
|
||||
default='small-icu',
|
||||
default='full-icu',
|
||||
choices=valid_intl_modes,
|
||||
help='Intl mode (valid choices: {0}) [default: %default]'.format(
|
||||
', '.join(valid_intl_modes)))
|
||||
@ -1399,7 +1401,8 @@ def configure_intl(o):
|
||||
icu_parent_path = 'deps'
|
||||
|
||||
# The full path to the ICU source directory. Should not include './'.
|
||||
icu_full_path = 'deps/icu'
|
||||
icu_deps_path = 'deps/icu'
|
||||
icu_full_path = icu_deps_path
|
||||
|
||||
# icu-tmp is used to download and unpack the ICU tarball.
|
||||
icu_tmp_path = os.path.join(icu_parent_path, 'icu-tmp')
|
||||
@ -1407,30 +1410,26 @@ def configure_intl(o):
|
||||
# canned ICU. see tools/icu/README.md to update.
|
||||
canned_icu_dir = 'deps/icu-small'
|
||||
|
||||
# use the README to verify what the canned ICU is
|
||||
canned_is_full = os.path.isfile(os.path.join(canned_icu_dir, 'README-FULL-ICU.txt'))
|
||||
canned_is_small = os.path.isfile(os.path.join(canned_icu_dir, 'README-SMALL-ICU.txt'))
|
||||
if canned_is_small:
|
||||
warn('Ignoring %s - in-repo small icu is no longer supported.' % canned_icu_dir)
|
||||
|
||||
# We can use 'deps/icu-small' - pre-canned ICU *iff*
|
||||
# - with_intl == small-icu (the default!)
|
||||
# - with_icu_locales == 'root,en' (the default!)
|
||||
# - deps/icu-small exists!
|
||||
# - canned_is_full AND
|
||||
# - with_icu_source is unset (i.e. no other ICU was specified)
|
||||
# (Note that this is the *DEFAULT CASE*.)
|
||||
#
|
||||
# This is *roughly* equivalent to
|
||||
# $ configure --with-intl=small-icu --with-icu-source=deps/icu-small
|
||||
# $ configure --with-intl=full-icu --with-icu-source=deps/icu-small
|
||||
# .. Except that we avoid copying icu-small over to deps/icu.
|
||||
# In this default case, deps/icu is ignored, although make clean will
|
||||
# still harmlessly remove deps/icu.
|
||||
|
||||
# are we using default locales?
|
||||
using_default_locales = ( options.with_icu_locales == icu_default_locales )
|
||||
|
||||
# make sure the canned ICU really exists
|
||||
canned_icu_available = os.path.isdir(canned_icu_dir)
|
||||
|
||||
if (o['variables']['icu_small'] == b(True)) and using_default_locales and (not with_icu_source) and canned_icu_available:
|
||||
if (not with_icu_source) and canned_is_full:
|
||||
# OK- we can use the canned ICU.
|
||||
icu_config['variables']['icu_small_canned'] = 1
|
||||
icu_full_path = canned_icu_dir
|
||||
|
||||
icu_config['variables']['icu_full_canned'] = 1
|
||||
# --with-icu-source processing
|
||||
# now, check that they didn't pass --with-icu-source=deps/icu
|
||||
elif with_icu_source and os.path.abspath(icu_full_path) == os.path.abspath(with_icu_source):
|
||||
@ -1508,29 +1507,40 @@ def configure_intl(o):
|
||||
icu_endianness = sys.byteorder[0]
|
||||
o['variables']['icu_ver_major'] = icu_ver_major
|
||||
o['variables']['icu_endianness'] = icu_endianness
|
||||
icu_data_file_l = 'icudt%s%s.dat' % (icu_ver_major, 'l')
|
||||
icu_data_file_l = 'icudt%s%s.dat' % (icu_ver_major, 'l') # LE filename
|
||||
icu_data_file = 'icudt%s%s.dat' % (icu_ver_major, icu_endianness)
|
||||
# relative to configure
|
||||
icu_data_path = os.path.join(icu_full_path,
|
||||
'source/data/in',
|
||||
icu_data_file_l)
|
||||
icu_data_file_l) # LE
|
||||
compressed_data = '%s.bz2' % (icu_data_path)
|
||||
if not os.path.isfile(icu_data_path) and os.path.isfile(compressed_data):
|
||||
# unpack. deps/icu is a temporary path
|
||||
if os.path.isdir(icu_tmp_path):
|
||||
shutil.rmtree(icu_tmp_path)
|
||||
os.mkdir(icu_tmp_path)
|
||||
icu_data_path = os.path.join(icu_tmp_path, icu_data_file_l)
|
||||
with open(icu_data_path, 'wb') as outf:
|
||||
with bz2.BZ2File(compressed_data, 'rb') as inf:
|
||||
shutil.copyfileobj(inf, outf)
|
||||
# Now, proceed..
|
||||
|
||||
# relative to dep..
|
||||
icu_data_in = os.path.join('..','..', icu_full_path, 'source/data/in', icu_data_file_l)
|
||||
icu_data_in = os.path.join('..','..', icu_data_path)
|
||||
if not os.path.isfile(icu_data_path) and icu_endianness != 'l':
|
||||
# use host endianness
|
||||
icu_data_path = os.path.join(icu_full_path,
|
||||
'source/data/in',
|
||||
icu_data_file)
|
||||
# relative to dep..
|
||||
icu_data_in = os.path.join('..', icu_full_path, 'source/data/in',
|
||||
icu_data_file)
|
||||
# this is the input '.dat' file to use .. icudt*.dat
|
||||
# may be little-endian if from a icu-project.org tarball
|
||||
o['variables']['icu_data_in'] = icu_data_in
|
||||
icu_data_file) # will be generated
|
||||
if not os.path.isfile(icu_data_path):
|
||||
# .. and we're not about to build it from .gyp!
|
||||
error('''ICU prebuilt data file %s does not exist.
|
||||
See the README.md.''' % icu_data_path)
|
||||
|
||||
# this is the input '.dat' file to use .. icudt*.dat
|
||||
# may be little-endian if from a icu-project.org tarball
|
||||
o['variables']['icu_data_in'] = icu_data_in
|
||||
|
||||
# map from variable name to subdirs
|
||||
icu_src = {
|
||||
'stubdata': 'stubdata',
|
||||
@ -1547,6 +1557,31 @@ def configure_intl(o):
|
||||
var = 'icu_src_%s' % i
|
||||
path = '../../%s/source/%s' % (icu_full_path, icu_src[i])
|
||||
icu_config['variables'][var] = glob_to_var('tools/icu', path, 'patches/%s/source/%s' % (icu_ver_major, icu_src[i]) )
|
||||
# calculate platform-specific genccode args
|
||||
# print("platform %s, flavor %s" % (sys.platform, flavor))
|
||||
# if sys.platform == 'darwin':
|
||||
# shlib_suffix = '%s.dylib'
|
||||
# elif sys.platform.startswith('aix'):
|
||||
# shlib_suffix = '%s.a'
|
||||
# else:
|
||||
# shlib_suffix = 'so.%s'
|
||||
if flavor == 'win':
|
||||
icu_config['variables']['icu_asm_ext'] = 'obj'
|
||||
icu_config['variables']['icu_asm_opts'] = [ '-o ' ]
|
||||
elif with_intl == 'small-icu' or options.cross_compiling:
|
||||
icu_config['variables']['icu_asm_ext'] = 'c'
|
||||
icu_config['variables']['icu_asm_opts'] = []
|
||||
elif flavor == 'mac':
|
||||
icu_config['variables']['icu_asm_ext'] = 'S'
|
||||
icu_config['variables']['icu_asm_opts'] = [ '-a', 'gcc-darwin' ]
|
||||
elif sys.platform.startswith('aix'):
|
||||
icu_config['variables']['icu_asm_ext'] = 'S'
|
||||
icu_config['variables']['icu_asm_opts'] = [ '-a', 'xlc' ]
|
||||
else:
|
||||
# assume GCC-compatible asm is OK
|
||||
icu_config['variables']['icu_asm_ext'] = 'S'
|
||||
icu_config['variables']['icu_asm_opts'] = [ '-a', 'gcc' ]
|
||||
|
||||
# write updated icu_config.gypi with a bunch of paths
|
||||
write(icu_config_name, do_not_edit +
|
||||
pprint.pformat(icu_config, indent=2) + '\n')
|
||||
|
8
deps/icu-small/README-FULL-ICU.txt
vendored
Normal file
8
deps/icu-small/README-FULL-ICU.txt
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
ICU sources - auto generated by shrink-icu-src.py
|
||||
|
||||
This directory contains the ICU subset used by --with-intl=full-icu
|
||||
It is a strict subset of ICU 64 source files with the following exception(s):
|
||||
* deps/icu-small/source/data/in/icudt64l.dat.bz2 : compressed data file
|
||||
|
||||
|
||||
To rebuild this directory, see ../../tools/icu/README.md
|
8
deps/icu-small/README-SMALL-ICU.txt
vendored
8
deps/icu-small/README-SMALL-ICU.txt
vendored
@ -1,8 +0,0 @@
|
||||
Small ICU sources - auto generated by shrink-icu-src.py
|
||||
|
||||
This directory contains the ICU subset used by --with-intl=small-icu (the default)
|
||||
It is a strict subset of ICU 64 source files with the following exception(s):
|
||||
* deps/icu-small/source/data/in/icudt64l.dat : Reduced-size data file
|
||||
|
||||
|
||||
To rebuild this directory, see ../../tools/icu/README.md
|
BIN
deps/icu-small/source/data/in/icudt64l.dat
vendored
BIN
deps/icu-small/source/data/in/icudt64l.dat
vendored
Binary file not shown.
BIN
deps/icu-small/source/data/in/icudt64l.dat.bz2
vendored
Normal file
BIN
deps/icu-small/source/data/in/icudt64l.dat.bz2
vendored
Normal file
Binary file not shown.
@ -23,11 +23,9 @@ programs. Some of them are:
|
||||
* [`RegExp` Unicode Property Escapes][]
|
||||
|
||||
Node.js (and its underlying V8 engine) uses [ICU][] to implement these features
|
||||
in native C/C++ code. However, some of them require a very large ICU data file
|
||||
in order to support all locales of the world. Because it is expected that most
|
||||
Node.js users will make use of only a small portion of ICU functionality, only
|
||||
a subset of the full ICU data set is provided by Node.js by default. Several
|
||||
options are provided for customizing and expanding the ICU data set either when
|
||||
in native C/C++ code. The full ICU data set is provided by Node.js by default.
|
||||
However, due to the size of the ICU data file, several
|
||||
options are provided for customizing the ICU data set either when
|
||||
building or running Node.js.
|
||||
|
||||
## Options for building Node.js
|
||||
@ -38,8 +36,8 @@ in [BUILDING.md][].
|
||||
|
||||
* `--with-intl=none`/`--without-intl`
|
||||
* `--with-intl=system-icu`
|
||||
* `--with-intl=small-icu` (default)
|
||||
* `--with-intl=full-icu`
|
||||
* `--with-intl=small-icu`
|
||||
* `--with-intl=full-icu` (default)
|
||||
|
||||
An overview of available Node.js and JavaScript features for each `configure`
|
||||
option:
|
||||
@ -66,8 +64,8 @@ operation is identical to that of `Date.prototype.toString()`.
|
||||
|
||||
### Disable all internationalization features (`none`)
|
||||
|
||||
If this option is chosen, most internationalization features mentioned above
|
||||
will be **unavailable** in the resulting `node` binary.
|
||||
If this option is chosen, ICU is disabled and most internationalization
|
||||
features mentioned above will be **unavailable** in the resulting `node` binary.
|
||||
|
||||
### Build with a pre-installed ICU (`system-icu`)
|
||||
|
||||
@ -106,9 +104,7 @@ console.log(spanish.format(january));
|
||||
// Should print "enero"
|
||||
```
|
||||
|
||||
This mode provides a good balance between features and binary size, and it is
|
||||
the default behavior if no `--with-intl` flag is passed. The official binaries
|
||||
are also built in this mode.
|
||||
This mode provides a balance between features and binary size.
|
||||
|
||||
#### Providing ICU data at runtime
|
||||
|
||||
@ -149,8 +145,9 @@ enable full `Intl` support.
|
||||
|
||||
This option makes the resulting binary link against ICU statically and include
|
||||
a full set of ICU data. A binary created this way has no further external
|
||||
dependencies and supports all locales, but might be rather large. See
|
||||
[BUILDING.md][BUILDING.md#full-icu] on how to compile a binary using this mode.
|
||||
dependencies and supports all locales, but might be rather large. This is
|
||||
the default behavior if no `--with-intl` flag is passed. The official binaries
|
||||
are also built in this mode.
|
||||
|
||||
## Detecting internationalization support
|
||||
|
||||
@ -205,7 +202,6 @@ to be helpful:
|
||||
[`String.prototype.toUpperCase()`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/toUpperCase
|
||||
[`require('buffer').transcode()`]: buffer.html#buffer_buffer_transcode_source_fromenc_toenc
|
||||
[`require('util').TextDecoder`]: util.html#util_class_util_textdecoder
|
||||
[BUILDING.md#full-icu]: https://github.com/nodejs/node/blob/master/BUILDING.md#build-with-full-icu-support-all-locales-supported-by-icu
|
||||
[BUILDING.md]: https://github.com/nodejs/node/blob/master/BUILDING.md
|
||||
[ECMA-262]: https://tc39.github.io/ecma262/
|
||||
[ECMA-402]: https://tc39.github.io/ecma402/
|
||||
|
@ -932,26 +932,9 @@ Per the [WHATWG Encoding Standard][], the encodings supported by the
|
||||
one or more aliases may be used.
|
||||
|
||||
Different Node.js build configurations support different sets of encodings.
|
||||
While a very basic set of encodings is supported even on Node.js builds without
|
||||
ICU enabled, support for some encodings is provided only when Node.js is built
|
||||
with ICU and using the full ICU data (see [Internationalization][]).
|
||||
(see [Internationalization][])
|
||||
|
||||
#### Encodings Supported Without ICU
|
||||
|
||||
| Encoding | Aliases |
|
||||
| ----------- | --------------------------------- |
|
||||
| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` |
|
||||
| `'utf-16le'` | `'utf-16'` |
|
||||
|
||||
#### Encodings Supported by Default (With ICU)
|
||||
|
||||
| Encoding | Aliases |
|
||||
| ----------- | --------------------------------- |
|
||||
| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` |
|
||||
| `'utf-16le'` | `'utf-16'` |
|
||||
| `'utf-16be'` | |
|
||||
|
||||
#### Encodings Requiring Full ICU Data
|
||||
#### Encodings Supported by Default (With Full ICU Data)
|
||||
|
||||
| Encoding | Aliases |
|
||||
| ----------------- | -------------------------------- |
|
||||
@ -990,6 +973,21 @@ with ICU and using the full ICU data (see [Internationalization][]).
|
||||
| `'shift_jis'` | `'csshiftjis'`, `'ms932'`, `'ms_kanji'`, `'shift-jis'`, `'sjis'`, `'windows-31j'`, `'x-sjis'` |
|
||||
| `'euc-kr'` | `'cseuckr'`, `'csksc56011987'`, `'iso-ir-149'`, `'korean'`, `'ks_c_5601-1987'`, `'ks_c_5601-1989'`, `'ksc5601'`, `'ksc_5601'`, `'windows-949'` |
|
||||
|
||||
#### Encodings Supported when Node.js is built with the `small-icu` option
|
||||
|
||||
| Encoding | Aliases |
|
||||
| ----------- | --------------------------------- |
|
||||
| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` |
|
||||
| `'utf-16le'` | `'utf-16'` |
|
||||
| `'utf-16be'` | |
|
||||
|
||||
#### Encodings Supported when ICU is disabled
|
||||
|
||||
| Encoding | Aliases |
|
||||
| ----------- | --------------------------------- |
|
||||
| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` |
|
||||
| `'utf-16le'` | `'utf-16'` |
|
||||
|
||||
The `'iso-8859-16'` encoding listed in the [WHATWG Encoding Standard][]
|
||||
is not supported.
|
||||
|
||||
@ -1005,9 +1003,9 @@ changes:
|
||||
* `encoding` {string} Identifies the `encoding` that this `TextDecoder` instance
|
||||
supports. **Default:** `'utf-8'`.
|
||||
* `options` {Object}
|
||||
* `fatal` {boolean} `true` if decoding failures are fatal. This option is only
|
||||
supported when ICU is enabled (see [Internationalization][]). **Default:**
|
||||
`false`.
|
||||
* `fatal` {boolean} `true` if decoding failures are fatal.
|
||||
This option is not supported when ICU is disabled
|
||||
(see [Internationalization][]). **Default:** `false`.
|
||||
* `ignoreBOM` {boolean} When `true`, the `TextDecoder` will include the byte
|
||||
order mark in the decoded result. When `false`, the byte order mark will
|
||||
be removed from the output. This option is only used when `encoding` is
|
||||
|
@ -27,7 +27,7 @@ internationalization functionality.
|
||||
|
||||
```shell
|
||||
./configure \
|
||||
--with-intl=small-icu \
|
||||
--with-intl=full-icu \
|
||||
--with-icu-source=http://download.icu-project.org/files/icu4c/58.1/icu4c-58_1-src.tgz
|
||||
make
|
||||
```
|
||||
@ -54,7 +54,7 @@ Also running
|
||||
new Intl.DateTimeFormat('es', {month: 'long'}).format(new Date(9E8));
|
||||
```
|
||||
|
||||
…Should return `January` not `enero`.
|
||||
…Should return `enero` not `January`.
|
||||
|
||||
* Now, copy `deps/icu` over to `deps/icu-small`
|
||||
|
||||
@ -94,12 +94,12 @@ tools/license-builder.sh
|
||||
|
||||
* Update the URL and hash for the full ICU file in `tools/icu/current_ver.dep`.
|
||||
It should match the ICU URL used in the first step. When this is done, the
|
||||
following should build with full ICU.
|
||||
following should build with small ICU.
|
||||
|
||||
```shell
|
||||
# clean up
|
||||
rm -rf out deps/icu deps/icu4c*
|
||||
./configure --with-intl=full-icu --download=all
|
||||
./configure --with-intl=small-icu --download=all
|
||||
make
|
||||
make test-ci
|
||||
```
|
||||
|
@ -212,16 +212,17 @@
|
||||
'conditions': [
|
||||
[ 'icu_small == "false"', { # and OS=win
|
||||
# full data - just build the full data file, then we are done.
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ],
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ],
|
||||
'dependencies': [ 'genccode#host' ],
|
||||
'actions': [
|
||||
{
|
||||
'action_name': 'icudata',
|
||||
'msvs_quote_cmd': 0,
|
||||
'inputs': [ '<(icu_data_in)' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ],
|
||||
# on Windows, we can go directly to .obj file (-o) option.
|
||||
'action': [ '<(PRODUCT_DIR)/genccode',
|
||||
'-o',
|
||||
'<@(icu_asm_opts)', # -o
|
||||
'-d', '<(SHARED_INTERMEDIATE_DIR)',
|
||||
'-n', 'icudata',
|
||||
'-e', 'icudt<(icu_ver_major)',
|
||||
@ -256,9 +257,9 @@
|
||||
'action_name': 'genccode',
|
||||
'msvs_quote_cmd': 0,
|
||||
'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ],
|
||||
'action': [ '<(PRODUCT_DIR)/genccode',
|
||||
'-o',
|
||||
'<@(icu_asm_opts)', # -o
|
||||
'-d', '<(SHARED_INTERMEDIATE_DIR)/',
|
||||
'-n', 'icudata',
|
||||
'-e', 'icusmdt<(icu_ver_major)',
|
||||
@ -266,20 +267,20 @@
|
||||
},
|
||||
],
|
||||
# This file contains the small ICU data.
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ],
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.<(icu_asm_ext)' ],
|
||||
} ] ], #end of OS==win and icu_small == true
|
||||
}, { # OS != win
|
||||
'conditions': [
|
||||
[ 'icu_small == "false"', {
|
||||
# full data - just build the full data file, then we are done.
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ],
|
||||
# full data - no trim needed
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.<(icu_asm_ext)' ],
|
||||
'dependencies': [ 'genccode#host', 'icupkg#host', 'icu_implementation#host', 'icu_uconfig' ],
|
||||
'include_dirs': [
|
||||
'<(icu_path)/source/common',
|
||||
],
|
||||
'actions': [
|
||||
{
|
||||
# Swap endianness (if needed), or at least copy the file
|
||||
# Copy the .dat file, swapping endianness if needed.
|
||||
'action_name': 'icupkg',
|
||||
'inputs': [ '<(icu_data_in)' ],
|
||||
'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ],
|
||||
@ -290,7 +291,7 @@
|
||||
],
|
||||
},
|
||||
{
|
||||
# Rename without the endianness marker
|
||||
# Rename without the endianness marker (icudt64l.dat -> icudt64.dat)
|
||||
'action_name': 'copy',
|
||||
'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ],
|
||||
'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ],
|
||||
@ -300,12 +301,14 @@
|
||||
],
|
||||
},
|
||||
{
|
||||
# convert full ICU data file to .c, or .S, etc.
|
||||
'action_name': 'icudata',
|
||||
'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ],
|
||||
'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ],
|
||||
'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.<(icu_asm_ext)' ],
|
||||
'action': [ '<(PRODUCT_DIR)/genccode',
|
||||
'-e', 'icudt<(icu_ver_major)',
|
||||
'-d', '<(SHARED_INTERMEDIATE_DIR)',
|
||||
'<@(icu_asm_opts)',
|
||||
'-f', 'icudt<(icu_ver_major)_dat',
|
||||
'<@(_inputs)' ],
|
||||
},
|
||||
@ -318,7 +321,8 @@
|
||||
'export_dependent_settings': [ 'icustubdata' ],
|
||||
'actions': [
|
||||
{
|
||||
# trim down ICU
|
||||
# Trim down ICU.
|
||||
# Note that icupkg is invoked automatically, swapping endianness if needed.
|
||||
'action_name': 'icutrim',
|
||||
'inputs': [ '<(icu_data_in)', 'icu_small.json' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ],
|
||||
@ -333,7 +337,7 @@
|
||||
'-v',
|
||||
'-L', '<(icu_locales)'],
|
||||
}, {
|
||||
# rename to get the final entrypoint name right
|
||||
# rename to get the final entrypoint name right (icudt64l.dat -> icusmdt64.dat)
|
||||
'action_name': 'rename',
|
||||
'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ],
|
||||
@ -342,17 +346,18 @@
|
||||
'<@(_outputs)',
|
||||
],
|
||||
}, {
|
||||
# build final .dat -> .obj
|
||||
# For icu-small, always use .c, don't try to use .S, etc.
|
||||
'action_name': 'genccode',
|
||||
'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ],
|
||||
'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.<(icu_asm_ext)' ],
|
||||
'action': [ '<(PRODUCT_DIR)/genccode',
|
||||
'<@(icu_asm_opts)',
|
||||
'-d', '<(SHARED_INTERMEDIATE_DIR)',
|
||||
'<@(_inputs)' ],
|
||||
},
|
||||
],
|
||||
# This file contains the small ICU data
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ],
|
||||
'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.<(icu_asm_ext)' ],
|
||||
# for umachine.h
|
||||
'include_dirs': [
|
||||
'<(icu_path)/source/common',
|
||||
|
@ -5,14 +5,15 @@ import os
|
||||
import re
|
||||
import sys
|
||||
import shutil
|
||||
import bz2
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
|
||||
parser.add_option('--icu-small',
|
||||
parser.add_option('--icudst',
|
||||
action='store',
|
||||
dest='icusmall',
|
||||
dest='icudst',
|
||||
default='deps/icu-small',
|
||||
help='path to target ICU directory to shrink. Will be deleted.')
|
||||
help='path to target ICU directory. Will be deleted.')
|
||||
|
||||
parser.add_option('--icu-src',
|
||||
action='store',
|
||||
@ -26,18 +27,26 @@ parser.add_option('--icutmp',
|
||||
default='out/Release/obj/gen/icutmp',
|
||||
help='path to icutmp dir.')
|
||||
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if os.path.isdir(options.icusmall):
|
||||
print('Deleting existing icusmall %s' % (options.icusmall))
|
||||
shutil.rmtree(options.icusmall)
|
||||
if os.path.isdir(options.icudst):
|
||||
print('Deleting existing icudst %s' % (options.icudst))
|
||||
shutil.rmtree(options.icudst)
|
||||
|
||||
if not os.path.isdir(options.icusrc):
|
||||
print('Missing source ICU dir --icusrc=%s' % (options.icusrc))
|
||||
sys.exit(1)
|
||||
|
||||
# compression stuff. Keep the suffix and the compression function in sync.
|
||||
compression_suffix = '.bz2'
|
||||
def compress_data(infp, outfp):
|
||||
with open(infp, 'rb') as inf:
|
||||
with bz2.BZ2File(outfp, 'wb') as outf:
|
||||
shutil.copyfileobj(inf, outf)
|
||||
|
||||
def print_size(fn):
|
||||
size = (os.stat(fn).st_size) / 1024000
|
||||
print('%dM\t%s' % (size, fn))
|
||||
|
||||
ignore_regex = re.compile('^.*\.(vcxproj|filters|nrm|icu|dat|xml|txt|ac|guess|m4|in|sub|py|mak)$')
|
||||
|
||||
@ -90,36 +99,41 @@ def icu_info(icu_full_path):
|
||||
return (icu_ver_major, icu_endianness)
|
||||
|
||||
(icu_ver_major, icu_endianness) = icu_info(options.icusrc)
|
||||
print("icudt%s%s" % (icu_ver_major, icu_endianness))
|
||||
print("Data file root: icudt%s%s" % (icu_ver_major, icu_endianness))
|
||||
dst_datafile = os.path.join(options.icudst, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness))
|
||||
|
||||
src_datafile = os.path.join(options.icutmp, "icusmdt%s.dat" % (icu_ver_major))
|
||||
dst_datafile = os.path.join(options.icusmall, "source","data","in", "icudt%s%s.dat" % (icu_ver_major, icu_endianness))
|
||||
src_datafile = os.path.join(options.icusrc, "source/data/in/icudt%sl.dat" % (icu_ver_major))
|
||||
dst_cmp_datafile = "%s%s" % (dst_datafile, compression_suffix)
|
||||
|
||||
if not os.path.isfile(src_datafile):
|
||||
print("Could not find source datafile %s - did you build small-icu node?" % src_datafile)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("will use small datafile %s" % (src_datafile))
|
||||
print('%s --> %s' % (options.icusrc, options.icusmall))
|
||||
shutil.copytree(options.icusrc, options.icusmall, ignore=icu_ignore)
|
||||
print('%s --> %s' % (src_datafile, dst_datafile))
|
||||
print("Error: icu data file not found: %s" % src_datafile)
|
||||
exit(1)
|
||||
|
||||
print("will use datafile %s" % (src_datafile))
|
||||
|
||||
print('%s --> %s' % (options.icusrc, options.icudst))
|
||||
shutil.copytree(options.icusrc, options.icudst, ignore=icu_ignore)
|
||||
|
||||
# now, make the data dir (since we ignored it)
|
||||
os.mkdir(os.path.join(os.path.join(options.icusmall, "source", "data")))
|
||||
os.mkdir(os.path.join(os.path.join(options.icusmall, "source", "data", "in")))
|
||||
icudst_data = os.path.join(options.icudst, "source", "data")
|
||||
icudst_in = os.path.join(icudst_data, "in")
|
||||
os.mkdir(icudst_data)
|
||||
os.mkdir(icudst_in)
|
||||
|
||||
# OK, now copy the data file
|
||||
shutil.copy(src_datafile, dst_datafile)
|
||||
print_size(src_datafile)
|
||||
|
||||
print('%s --compress-> %s' % (src_datafile, dst_cmp_datafile))
|
||||
compress_data(src_datafile, dst_cmp_datafile)
|
||||
print_size(dst_cmp_datafile)
|
||||
readme_name = os.path.join(options.icudst, "README-FULL-ICU.txt" )
|
||||
|
||||
# Now, print a short notice
|
||||
readme_name = os.path.join(options.icusmall, "README-SMALL-ICU.txt" )
|
||||
|
||||
fi = open(readme_name, 'wb')
|
||||
print("Small ICU sources - auto generated by shrink-icu-src.py", file=fi)
|
||||
print("ICU sources - auto generated by shrink-icu-src.py", file=fi)
|
||||
print("", file=fi)
|
||||
print("This directory contains the ICU subset used by --with-intl=small-icu (the default)", file=fi)
|
||||
print("This directory contains the ICU subset used by --with-intl=full-icu", file=fi)
|
||||
print("It is a strict subset of ICU %s source files with the following exception(s):" % (icu_ver_major), file=fi)
|
||||
print("* %s : Reduced-size data file" % (dst_datafile), file=fi)
|
||||
print("* %s : compressed data file" % (dst_cmp_datafile), file=fi)
|
||||
print("", file=fi)
|
||||
print("", file=fi)
|
||||
print("To rebuild this directory, see ../../tools/icu/README.md", file=fi)
|
||||
|
Loading…
x
Reference in New Issue
Block a user