Module:UCS

From Omniversalis

Documentation for this module may be created at Module:UCS/doc

--	┌─────────────────────────────────────────┐
--	│  Makes the table of UCS (Unicode) characters for a reference page    │
--	└─────────────────────────────────────────┘

--        G l o b a l   v a r i a b l e s .
local outbuff = { '{| class="wikitable"' }  -- a sequence of (output) strings
local outptr = 1   -- global pointer in outbuff
local base_codepoint = 32;
local Block = " [[Basic Latin (Unicode block)|Basic Latin]]"
local row_start = 1 -- usually, pointer to the last " |-" in outbuff

--        U t i l i t y   f u n c t i o n s   s t a r t   h e r e .
function puts( s )
--    mw.log("Output: "..s)
    outptr = outptr + 1
    outbuff[outptr] = s
end

function close_row( NoC, s )
--    mw.log("close_row("..NoC..", "..s..")")
    if ( outptr > row_start ) then
        local columns_deficit = row_start + NoC - outptr
        if (columns_deficit > 0) then  -- may not happen with correct input data
            local colspan=''
            if (columns_deficit > 1) then colspan='colspan='..columns_deficit..' ' end
            puts ( ' | '..colspan..' style="color:red" |'..s)
        end
        puts(" |-")
        row_start = outptr
    end
end


function mkchar( c )
    if (
        ( c < 36)  -- C0, space, !, ", #
    or  ( c == 38 ) -- &
    or  ( c >= 91 ) and ( c <= 93 ) -- [ \ ]
    or  ( c >= 123 ) and ( c <= 125 ) -- { | }
    or  ( c == 127 ) -- DEL, and ( c < 160 ) (C1) pointless
    ) then
        return '&#'..c..';'
    end
    return mw.ustring.char( c )
end

local hh = 0;
function is_hex ( c )
    if (c>102) then return -1 end
    if (c>=97) then  -- a–f
        hh = c - 87
        return hh
    end
    if (c>70) then return -1 end
    if (c>=65) then -- A–F
        hh = c - 55
        return hh
    end
    if (c>=58) then return -1 end
    if (c>=48) then -- 0–9
        hh = c - 48
        return hh
    end
    return (-1)
end

function get_hex ( s, i )
    local v = 0
    while ( is_hex (string.byte( s, i)) >= 0 ) do
        v = 16*v + hh
        i = i + 1
    end
    return v, i
end
--        U t i l i t y   f u n c t i o n s   e n d   h e r e .

local p = {}

--        T h e   a n n o t a t i o n s   p a r s e r   s t a r t s   h e r e .
p.annot_map = { }
function mk_item ( c )
    if ( p.annot_map[c] ) then
        return ('[['..p.annot_map[c]..'|'..mkchar(c)..']]')
    end
    return mkchar(c)
end

function p.process_arg3 ( annots )
--    mw.log(" annots = "..annots)
    local iter = mw.ustring.gmatch( annots, "(%S+)(#.-)%s" )
    while (true) do
        local t, a;
        t, a = iter()
        if (not a) then return end
--        mw.log(t.." → "..a)
        for cpt in mw.ustring.gcodepoint( t ) do
            p.annot_map[cpt] = a
        end
    end
end
--        T h e   a n n o t a t i o n s   p a r s e r   e n d s   h e r e .

--        T h e   c h a r a c t e r   l i s t   p a r s e r   s t a r t s   h e r e .
local bubu = 'style="color:#9900FF" '
local bgg = {
    bubu, bubu, bubu, 'bgcolor=#999999 ', bubu, bubu, bubu, bubu,
    'bgcolor=#6600FF ', -- IPA
    'style="background-color:#000000; color:#FFFF66" ', --combining diacritics
-- Latin letters (K, L, M)
    'bgcolor=#3333FF ', -- ASCII
    'bgcolor=#3377FF ', -- lesser common
    'bgcolor=#0099FF ', -- exotic
-- Numbers (N)
    'bgcolor=#FF9999 ', 
-- Control characters (O)
    'bgcolor=#FFAA66 ', 
-- Punctuation (P, Q)
    'bgcolor=#33FF33 ', -- common (English)
    'bgcolor=#22AA22 ', -- lesser common
    bubu,
-- Symbols (S, T, U)
    'bgcolor=#FFFF66 ', -- common
    'bgcolor=#CCFF66 ', -- box drawing / pseudographics
    'bgcolor=#AAAA44 ', -- uncommon
    bubu, bubu, '', bubu, bubu,  bubu,  bubu,  bubu,  bubu,  bubu, [0] = bubu
}

function p.process_arg2 ( charlist )
    local c_length = string.len ( charlist )
    if ( c_length <= 1 ) then return 0 end
    local c_index = 1
    while ( c_index <= c_length ) do
        local c_code = string.byte( charlist, c_index )
        if ( c_code == 43 ) then -- “+”
            base_codepoint, c_index = get_hex (charlist, c_index+1 )
            if (
                ( outptr == row_start + 1 )
            and string.match( outbuff[outptr], '^ | style=')
            ) then
                outbuff[outptr] = ' | colspan=33 ' .. string.sub( outbuff[outptr], 3)
                puts(" |-")
                row_start = outptr
            else
                close_row( 33, "Unfinished row")
            end
        elseif ( c_code == 33 ) then -- “!”
            close_row( 33, "Unexpected “!” command")
            local eol = string.find( charlist, "\n", c_index+1, true )
            if (eol == nil) then break end
            Block = string.sub( charlist, c_index+1, eol-1 )
            puts(
                ' | style="font-size:80%" |U+' ..
                string.format('%04x:',base_codepoint) .. Block
            )
            local o = base_codepoint % 32 
            if ( o > 0 ) then
                puts( ' | colspan='..o..' |' )
                row_start = row_start - o + 1 -- temporary kludge
            end
            c_index = eol + 1
        elseif ( c_code == 10 ) then -- line feed
            if (
                ( outptr == row_start + 2 )  -- only one item in the row
            and ( string.byte( charlist, c_index - 1 ) == 45 ) -- it is “-”
            and string.match( outbuff[row_start+1], '^ | style=')
            ) then
                outbuff[row_start+1] = ' | colspan=33 bgcolor=#FF6699 ' .. string.sub( outbuff[row_start+1], 3)
                outbuff[outptr] = " |-"
                row_start = outptr
            else
                close_row( 33, "(skipped)") -- temporary
            end
            base_codepoint = base_codepoint + ( (2097152 - base_codepoint) % 16 )
            c_index = c_index + 1
        else
            if ( outptr <= row_start ) then
                puts(
                    ' | style="font-size:75%" |U+' ..
                    string.format('%04x:',base_codepoint) .. Block
                )
            end
            if ( (c_code >= 65 ) and (c_code <= 122) ) then
                local dimin = ''
                if (c_code >= 96 ) then dimin = 'style="font-size:75%" ' end
                local item = mk_item(base_codepoint)
                if ( c_code%32 == 10 ) then item = '◌'..item end
                puts(' | '..bgg[c_code%32]..dimin..'|\t'..item)
                base_codepoint = base_codepoint + 1 --temporary
            elseif ( c_code == 45 ) then -- “-”
                puts(' | bgcolor=#AA4466 |&nbsp;')
                base_codepoint = base_codepoint + 1 --temporary
            end -- ignore all other bytes
            c_index = c_index + 1
        end
    end
    close_row( 33, "end of data")
    return 1
end
--        T h e   c h a r a c t e r   l i s t   p a r s e r   e n d s   h e r e .


--        T h e   m a i n   r o u t i n e   s t a r t s   h e r e .
function p.table( frame )
-- frame.args[1] is ignored now, but planned to affect the table format
    puts(" |Block(s)")
    for k = 0, 9 do
        puts(" ! "..k)
    end
    for k = 10, 31 do
        puts(' ! style="font-size:75%; line-height:1.25" |'..string.format("%d<br/>%02x", k, k))
    end
    close_row( 33, "???")
    if ( frame.args[3] ) then
        p.process_arg3 ( frame.args[3] )
    end
    if ( frame.args[2] ) then
        p.process_arg2 ( frame.args[2] )
    else
        p.process_arg2 ( [=[
PPPSSSSPPPSSPPPPNNNNNNNNNNPPSSSP
SKKKKKKKKKKKKKKKKKKKKKKKKKKPPPSS
DKKKKKKKKKKKKKKKKKKKKKKKKKKPPPS-
+00A0! [[Latin-1 Supplement (Unicode block)|Latin-1 Supplement]]
PQSSSSUPDSDQSOSDSSDDDSPPDDDQdddQ
LLLLLLlLLLLLLLLLLLLLLLLSLLLLLLLL
LLLLLLlLLLLLLLLLILLLLLLULLLLLLLL
]=] )
    end
    outbuff[outptr] = " |}"
    return table.concat( outbuff, "\n" )
end
--        T h e   m a i n   r o u t i n e   e n d s   h e r e .


function p.sheet( frame )
	return '\nThe <code>sheet</code> call is discontinued.\t'
end

return p