#!/bin/sh # -------------------------------------------------------------------------- # This is the script to create the Unicode chars property table. # It expects a UnicodeData file (http://www.unicode.org/Public/UNIDATA/UnicodeData.txt) # on standard input, and produces Erlang code on its standard output. This Erlang code # should be included into another module as it does not contain module header # nor export declarations. # # Written by Dimitry Golubovsky (dimitry@golubovsky.org) as part of basic # Unicode support for Yhc Core Erlang back-end. # # ------------------------------------------------------------------------- # Output the file header echo "\ %% ----------------------------------------------------------------------------- %% This is an automatically generated file: DO NOT EDIT. %% Generated by $0 on `date` %% ----------------------------------------------------------------------------- " # Convert the file to the C table awk ' BEGIN { FS = ";" num_props = 0 num_blocks = 0 first_code = -1 digits = "0123456789ABCDEF" for (i=0; i<16; i++) hex[substr(digits,i+1,1)] = i } function readhex(a) { l = length(a) acc = 0 for (i=1; i<=l; i++) acc = acc*16+hex[substr(a,i,1)] return acc } function endblock() { if (first_code >= 0) { blocks[num_blocks] = block_start ", " (next_code-first_code) ", prop" props[block_prop] "()" num_blocks++ } block_start = hex_code first_code = this_code block_prop = prop } { hex_code = "16#" $1 this_code = readhex($1) name = $2 cat = $3 updist = $13 == "" ? 0 : readhex($13) - this_code lowdist = $14 == "" ? 0 : readhex($14) - this_code ttldist = $15 == "" ? 0 : readhex($15) - this_code prop = "\47GENCAT_" cat "\47, " updist ", " lowdist ", " ttldist if (props[prop] == "") props[prop] = num_props++ if (index(name, "Last>") == 0 && (this_code != next_code || prop != block_prop)) endblock() next_code = this_code+1 } END { endblock() print "-define (NUM_BLOCKS, " num_blocks ")." print "" for (p in props) props_inv[props[p]] = p for (i=0; i { " props_inv[i] " }.\n" print "" print "char_block () -> {" for (i=0; i