Module:UCS
Documentation for this module may be created at Module:UCS/doc
-- ┌─────────────────────────────────────────┐
-- │ Makes the table of UCS (Unicode) characters for a reference page │
-- └─────────────────────────────────────────┘
-- G l o b a l v a r i a b l e s .
local outbuff = { '{| class="wikitable"' } -- a sequence of (output) strings
local outptr = 1 -- global pointer in outbuff
local base_codepoint = 32;
local Block = " [[Basic Latin (Unicode block)|Basic Latin]]"
local row_start = 1 -- usually, pointer to the last " |-" in outbuff
-- U t i l i t y f u n c t i o n s s t a r t h e r e .
function puts( s )
-- mw.log("Output: "..s)
outptr = outptr + 1
outbuff[outptr] = s
end
function close_row( NoC, s )
-- mw.log("close_row("..NoC..", "..s..")")
if ( outptr > row_start ) then
local columns_deficit = row_start + NoC - outptr
if (columns_deficit > 0) then -- may not happen with correct input data
local colspan=''
if (columns_deficit > 1) then colspan='colspan='..columns_deficit..' ' end
puts ( ' | '..colspan..' style="color:red" |'..s)
end
puts(" |-")
row_start = outptr
end
end
function mkchar( c )
if (
( c < 36) -- C0, space, !, ", #
or ( c == 38 ) -- &
or ( c >= 91 ) and ( c <= 93 ) -- [ \ ]
or ( c >= 123 ) and ( c <= 125 ) -- { | }
or ( c == 127 ) -- DEL, and ( c < 160 ) (C1) pointless
) then
return '&#'..c..';'
end
return mw.ustring.char( c )
end
local hh = 0;
function is_hex ( c )
if (c>102) then return -1 end
if (c>=97) then -- a–f
hh = c - 87
return hh
end
if (c>70) then return -1 end
if (c>=65) then -- A–F
hh = c - 55
return hh
end
if (c>=58) then return -1 end
if (c>=48) then -- 0–9
hh = c - 48
return hh
end
return (-1)
end
function get_hex ( s, i )
local v = 0
while ( is_hex (string.byte( s, i)) >= 0 ) do
v = 16*v + hh
i = i + 1
end
return v, i
end
-- U t i l i t y f u n c t i o n s e n d h e r e .
local p = {}
-- T h e a n n o t a t i o n s p a r s e r s t a r t s h e r e .
p.annot_map = { }
function mk_item ( c )
if ( p.annot_map[c] ) then
return ('[['..p.annot_map[c]..'|'..mkchar(c)..']]')
end
return mkchar(c)
end
function p.process_arg3 ( annots )
-- mw.log(" annots = "..annots)
local iter = mw.ustring.gmatch( annots, "(%S+)(#.-)%s" )
while (true) do
local t, a;
t, a = iter()
if (not a) then return end
-- mw.log(t.." → "..a)
for cpt in mw.ustring.gcodepoint( t ) do
p.annot_map[cpt] = a
end
end
end
-- T h e a n n o t a t i o n s p a r s e r e n d s h e r e .
-- T h e c h a r a c t e r l i s t p a r s e r s t a r t s h e r e .
local bubu = 'style="color:#9900FF" '
local bgg = {
bubu, bubu, bubu, 'bgcolor=#999999 ', bubu, bubu, bubu, bubu,
'bgcolor=#6600FF ', -- IPA
'style="background-color:#000000; color:#FFFF66" ', --combining diacritics
-- Latin letters (K, L, M)
'bgcolor=#3333FF ', -- ASCII
'bgcolor=#3377FF ', -- lesser common
'bgcolor=#0099FF ', -- exotic
-- Numbers (N)
'bgcolor=#FF9999 ',
-- Control characters (O)
'bgcolor=#FFAA66 ',
-- Punctuation (P, Q)
'bgcolor=#33FF33 ', -- common (English)
'bgcolor=#22AA22 ', -- lesser common
bubu,
-- Symbols (S, T, U)
'bgcolor=#FFFF66 ', -- common
'bgcolor=#CCFF66 ', -- box drawing / pseudographics
'bgcolor=#AAAA44 ', -- uncommon
bubu, bubu, '', bubu, bubu, bubu, bubu, bubu, bubu, bubu, [0] = bubu
}
function p.process_arg2 ( charlist )
local c_length = string.len ( charlist )
if ( c_length <= 1 ) then return 0 end
local c_index = 1
while ( c_index <= c_length ) do
local c_code = string.byte( charlist, c_index )
if ( c_code == 43 ) then -- “+”
base_codepoint, c_index = get_hex (charlist, c_index+1 )
if (
( outptr == row_start + 1 )
and string.match( outbuff[outptr], '^ | style=')
) then
outbuff[outptr] = ' | colspan=33 ' .. string.sub( outbuff[outptr], 3)
puts(" |-")
row_start = outptr
else
close_row( 33, "Unfinished row")
end
elseif ( c_code == 33 ) then -- “!”
close_row( 33, "Unexpected “!” command")
local eol = string.find( charlist, "\n", c_index+1, true )
if (eol == nil) then break end
Block = string.sub( charlist, c_index+1, eol-1 )
puts(
' | style="font-size:80%" |U+' ..
string.format('%04x:',base_codepoint) .. Block
)
local o = base_codepoint % 32
if ( o > 0 ) then
puts( ' | colspan='..o..' |' )
row_start = row_start - o + 1 -- temporary kludge
end
c_index = eol + 1
elseif ( c_code == 10 ) then -- line feed
if (
( outptr == row_start + 2 ) -- only one item in the row
and ( string.byte( charlist, c_index - 1 ) == 45 ) -- it is “-”
and string.match( outbuff[row_start+1], '^ | style=')
) then
outbuff[row_start+1] = ' | colspan=33 bgcolor=#FF6699 ' .. string.sub( outbuff[row_start+1], 3)
outbuff[outptr] = " |-"
row_start = outptr
else
close_row( 33, "(skipped)") -- temporary
end
base_codepoint = base_codepoint + ( (2097152 - base_codepoint) % 16 )
c_index = c_index + 1
else
if ( outptr <= row_start ) then
puts(
' | style="font-size:75%" |U+' ..
string.format('%04x:',base_codepoint) .. Block
)
end
if ( (c_code >= 65 ) and (c_code <= 122) ) then
local dimin = ''
if (c_code >= 96 ) then dimin = 'style="font-size:75%" ' end
local item = mk_item(base_codepoint)
if ( c_code%32 == 10 ) then item = '◌'..item end
puts(' | '..bgg[c_code%32]..dimin..'|\t'..item)
base_codepoint = base_codepoint + 1 --temporary
elseif ( c_code == 45 ) then -- “-”
puts(' | bgcolor=#AA4466 | ')
base_codepoint = base_codepoint + 1 --temporary
end -- ignore all other bytes
c_index = c_index + 1
end
end
close_row( 33, "end of data")
return 1
end
-- T h e c h a r a c t e r l i s t p a r s e r e n d s h e r e .
-- T h e m a i n r o u t i n e s t a r t s h e r e .
function p.table( frame )
-- frame.args[1] is ignored now, but planned to affect the table format
puts(" |Block(s)")
for k = 0, 9 do
puts(" ! "..k)
end
for k = 10, 31 do
puts(' ! style="font-size:75%; line-height:1.25" |'..string.format("%d<br/>%02x", k, k))
end
close_row( 33, "???")
if ( frame.args[3] ) then
p.process_arg3 ( frame.args[3] )
end
if ( frame.args[2] ) then
p.process_arg2 ( frame.args[2] )
else
p.process_arg2 ( [=[
PPPSSSSPPPSSPPPPNNNNNNNNNNPPSSSP
SKKKKKKKKKKKKKKKKKKKKKKKKKKPPPSS
DKKKKKKKKKKKKKKKKKKKKKKKKKKPPPS-
+00A0! [[Latin-1 Supplement (Unicode block)|Latin-1 Supplement]]
PQSSSSUPDSDQSOSDSSDDDSPPDDDQdddQ
LLLLLLlLLLLLLLLLLLLLLLLSLLLLLLLL
LLLLLLlLLLLLLLLLILLLLLLULLLLLLLL
]=] )
end
outbuff[outptr] = " |}"
return table.concat( outbuff, "\n" )
end
-- T h e m a i n r o u t i n e e n d s h e r e .
function p.sheet( frame )
return '\nThe <code>sheet</code> call is discontinued.\t'
end
return p