Module:Language: Difference between revisions

From Omniversalis
Content added Content deleted
m (1 revision imported)
en>Centrist16
(Created page with "require('Module:No globals') local m_data = mw.loadData("Module:Language/data") local langData = m_data.languages or m_data local p = {} local function checkForString(variab...")
Line 1: Line 1:
require('Module:No globals')
require('Module:No globals')
local m_data = mw.loadData("Module:Language/data")
local m_data = mw.loadData("Module:Language/data")
local langData = m_data.languages or m_data


local p = {}
local p = {}

local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local match = mw.ustring.match
local lower = mw.ustring.lower
local upper = mw.ustring.upper


local function checkForString(variable)
local function checkForString(variable)
Line 16: Line 10:


local function makeLinkedName(languageCode)
local function makeLinkedName(languageCode)
local data = m_data[languageCode]
local data = langData[languageCode]
local article = data["article"]
local article = data["article"]
local name = data["Wikipedia_name"] or data["name"]
local name = data["Wikipedia_name"] or data["name"]
Line 23: Line 17:


local function makeEntryName(word, languageCode)
local function makeEntryName(word, languageCode)
local data = m_data[languageCode]
local data = langData[languageCode]
local ugsub = mw.ustring.gsub
word = tostring(word)
word = tostring(word)
if word == nil then
if word == nil then
Line 31: Line 26:
else
else
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
word = gsub(word, "\'\'\'", "")
word = word:gsub("\'\'\'", "")
word = gsub(word, "\'\'", "")
word = word:gsub("\'\'", "")
if data == nil then
if data == nil then
return word
return word
Line 40: Line 35:
return word
return word
else
else
-- Decompose so that the diacritics of characters such
for regex, replacement in pairs(replacements) do
-- as á can be removed in one go.
word = gsub(word, regex, replacement)
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
if replacements.decompose then
word = mw.ustring.toNFD(word)
for i, from in ipairs(replacements.from) do
word = ugsub(
word,
from,
replacements.to and replacements.to[i] or "")
end
else
for regex, replacement in pairs(replacements) do
word = ugsub(word, regex, replacement)
end
end
end
return word
return word
Line 47: Line 56:
end
end
end
end
end

p.makeEntryName = makeEntryName

local function fixScriptCode(firstLetter, threeLetters)
return string.upper(firstLetter) .. string.lower(threeLetters)
end
end


Line 54: Line 69:
if codes == nil or codes == "" then
if codes == nil or codes == "" then
errorText = 'no language or script code provided'
errorText = 'no language or script code provided'
elseif find(codes, "^%s*%a%a%a?%s*$") or find(codes, "^%s*%a%a%a?%-%a%a%a%a%s*$") then
elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then
-- A three- or two-letter lowercase sequence at beginning of first parameter
-- A three- or two-letter lowercase sequence at beginning of first parameter
languageCode =
languageCode =
find(codes, "^%s*%a%a%a?") and (
codes:find("^%a%a%a?") and (
match(codes, "^%s*(%l%l%l?)")
codes:match("^(%l%l%l?)")
or gsub(
or codes:match("^(%a%a%a?)")
match(codes, "^%s*(%a%a%a?)"),
:gsub("(%a%a%a?)", string.lower, 1)
"(%a%a%a?)",
function(a)
return lower(a)
end,
1
)
)
)
-- One uppercase and three lowercase letters at the end of the first parameter
-- One uppercase and three lowercase letters at the end of the first parameter
scriptCode =
scriptCode =
find(codes, "%a%a%a%a%s*$") and (
codes:find("%a%a%a%a$") and (
match(codes, "(%u%l%l%l)%s*$")
codes:match("(%u%l%l%l)$")
or gsub(
or gsub(
match(codes, "(%a%a%a%a)%s*$"),
codes:match("(%a%a%a%a)$"),
"(%a)(%a%a%a)",
"(%a)(%a%a%a)",
function(a, b)
fixScriptCode,
return upper(a) .. lower(b)
end,
1
1
)
)
)
)
elseif find(codes, "^%s*%a%a%a%-%a%a%a$") then
elseif codes:find("^%a%a%a?%-%a%a%a?$")
or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
languageCode = match(codes, "^%s*%l%l%l%-%l%l%l$") and match (codes, "^%s*%l%l%l%-%l%l%l$") or gsub(match(codes, "^%s*%a%a%a%-%a%a%a$"), "(%a%a%a?)", function(a) return lower(a) end, 1)
languageCode = codes
elseif find(codes, "^%s*%a%a%a?") then
languageCode, invalidCode = match(codes, "^%s*(%a%a%a?)%-?(.*)")
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
languageCode = lower(languageCode)
-- letters separated by hyphens. This only allows for one sequence, as it is
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
languageCode, scriptCode =
codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
if not languageCode then
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
else
scriptCode = scriptCode:gsub(
"(%a)(%a%a%a)",
fixScriptCode,
1
)
end
elseif codes:find("^%a%a%a?") then
languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
languageCode = string.lower(languageCode)
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
elseif find(codes, "%-?%a%a%a%a%s*$") then
elseif codes:find("%-?%a%a%a%a$") then
invalidCode, scriptCode = match(codes, "(.*)%-?(%a%a%a%a)%s*$")
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$")
scriptCode = gsub(
scriptCode = gsub(
scriptCode,
scriptCode,
"(%a)(%a%a%a)",
"(%a)(%a%a%a)",
fixScriptCode
function(a, b)
return upper(a) .. lower(b)
end
)
)
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
Line 100: Line 124:
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
end
end
if not scriptCode then
if not scriptCode or scriptCode == "" then
scriptCode = require("Module:Language/scripts").isLatn(text) and "Latn" or "unknown"
scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown"
end
end
if errorText then
if errorText then
Line 108: Line 132:
errorText = ""
errorText = ""
end
end
languageCode = m_data.redirects[languageCode] or languageCode
return languageCode, scriptCode, errorText
return languageCode, scriptCode, errorText
end
end


local function tag(text, languageCode, script, italics)
local function tag(text, languageCode, script, italics)
local data = m_data[languageCode]
local data = langData[languageCode]
-- Use Wikipedia code if it has been given: for instance,
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia
-- code "ine-x-proto".
languageCode = data and data.Wikipedia_code or languageCode
local italicize = script == "Latn" and italics
local italicize = script == "Latn" and italics
Line 133: Line 162:
return table.concat(out)
return table.concat(out)
end
end




function p.lang(frame)
function p.lang(frame)
Line 138: Line 169:
local args = parent.args[1] and parent.args or frame.args
local args = parent.args[1] and parent.args or frame.args
local codes = args[1]
local codes = args[1] and mw.text.trim(args[1])
local text = args[2] or error("Provide text in the second parameter")
local text = args[2] or error("Provide text in the second parameter")
local languageCode, scriptCode, errorText = getCodes(codes, text)
local languageCode, scriptCode, errorText = getCodes(codes, text)
local italics = args.italics or args.i
local italics = args.italics or args.i or args.italic
italics = not (italics == "n" or italics == "-")
italics = not (italics == "n" or italics == "-" or italics == "no")
return tag(text, languageCode, scriptCode, italics) .. errorText
return tag(text, languageCode, scriptCode, italics) .. errorText
Line 150: Line 181:


local function linkToWiktionary(entry, linkText, languageCode)
local function linkToWiktionary(entry, linkText, languageCode)
local data = m_data[languageCode]
local data = langData[languageCode]
local name
local name
if languageCode then
if languageCode then
if data then
if data and data.name then
name = data.name
name = data.name
elseif mw.language.fetchLanguageName(languageCode, 'en') ~= "" then
elseif mw.language.fetchLanguageName(languageCode, 'en') ~= "" then
Line 159: Line 190:
name = mw.language.fetchLanguageName(languageCode, 'en')
name = mw.language.fetchLanguageName(languageCode, 'en')
else
else
error("No name for the language " .. (languageCode or "nil") .. " could be found")
error("No name for the language " .. ("%q"):format(languageCode or nil) .. " could be found")
end
end
if sub(entry, 1, 1) == "*" then
if entry:sub(1, 1) == "*" then
if name ~= "" then
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. sub(entry, 2)
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
else
error("Language name is empty")
end
elseif data and data.type == "reconstructed" then
mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
local frame = mw.getCurrentFrame()
-- Track reconstructed entries with no asterisk by transcluding
-- a nonexistent template. This technique is used in Wiktionary:
-- see [[wikt:Module:debug]].
-- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]]
pcall(frame.expandTemplate, frame,
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry
else
error("Language name is empty")
end
elseif data and data.type == "appendix" then
if name ~= "" then
entry = "Appendix:" .. name .. "/" .. entry
else
else
error("Language name is empty")
error("Language name is empty")
Line 182: Line 233:
local args = parent.args[1] and parent.args or frame.args
local args = parent.args[1] and parent.args or frame.args
local codes = args[1] or nil
local codes = args[1] and mw.text.trim(args[1])
local word1 = args[2] or nil
local word1 = args[2]
local word2 = args[3] or nil
local word2 = args[3]
if not args[2] then
error("Parameter 2 is required")
end
local languageCode, scriptCode, errorText = getCodes(codes, word1)
local languageCode, scriptCode, errorText = getCodes(codes, word1)
Line 220: Line 275:
local args = parent.args[1] and parent.args or frame.args
local args = parent.args[1] and parent.args or frame.args
local codes = args[1] or nil
local codes = args[1] and mw.text.trim(args[1])
local word1 = args[2] or nil
local word1 = args[2]
local word2 = args[3] or nil
local word2 = args[3]
if not word1 then
error("Provide a word in parameter 2.")
end
local languageCode, scriptCode, errorText = getCodes(codes, word1)
local languageCode, scriptCode, errorText = getCodes(codes, word1)

Revision as of 18:46, 3 July 2019

Documentation for this module may be created at Module:Language/doc

require('Module:No globals')
local m_data = mw.loadData("Module:Language/data")
local langData = m_data.languages or m_data

local p = {}

local function checkForString(variable)
	return variable ~= "" and variable ~= nil
end

local function makeLinkedName(languageCode)
	local data = langData[languageCode]
	local article = data["article"]
	local name = data["Wikipedia_name"] or data["name"]
	return "[[" .. article .. "|" .. name .. "]]:&nbsp;"
end

local function makeEntryName(word, languageCode)
	local data = langData[languageCode]
	local ugsub = mw.ustring.gsub
	word = tostring(word)
	if word == nil then
		error("The function makeEntryName requires a string argument")
	elseif word == "" then
		return ""
	else
		-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
		word = word:gsub("\'\'\'", "")
		word = word:gsub("\'\'", "")
		if data == nil then
			return word
		else
			local replacements = data and data["replacements"]
			if replacements == nil then
				return word
			else
				-- Decompose so that the diacritics of characters such
				-- as á can be removed in one go.
				-- No need to compose at the end, because the MediaWiki software
				-- will handle that.
				if replacements.decompose then
					word = mw.ustring.toNFD(word)
					for i, from in ipairs(replacements.from) do
						word = ugsub(
							word,
							from,
							replacements.to and replacements.to[i] or "")
					end
				else
					for regex, replacement in pairs(replacements) do
						word = ugsub(word, regex, replacement)
					end
				end
				return word
			end
		end
	end
end

p.makeEntryName = makeEntryName

local function fixScriptCode(firstLetter, threeLetters)
	return string.upper(firstLetter) .. string.lower(threeLetters)
end

local function getCodes(codes, text)
	local languageCode, scriptCode, invalidCode
	local errorText
	if codes == nil or codes == "" then
		errorText = 'no language or script code provided'
	elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then
		-- A three- or two-letter lowercase sequence at beginning of first parameter
		languageCode =
			codes:find("^%a%a%a?") and (
				codes:match("^(%l%l%l?)")
				or codes:match("^(%a%a%a?)")
					:gsub("(%a%a%a?)", string.lower, 1)
			)
		-- One uppercase and three lowercase letters at the end of the first parameter
		scriptCode =
			codes:find("%a%a%a%a$") and (
				codes:match("(%u%l%l%l)$")
				or gsub(
					codes:match("(%a%a%a%a)$"),
					"(%a)(%a%a%a)",
					fixScriptCode,
					1
				)
			)
	elseif codes:find("^%a%a%a?%-%a%a%a?$")
	or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
		languageCode = codes
	
	-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
	-- letters separated by hyphens. This only allows for one sequence, as it is
	-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
	elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
		languageCode, scriptCode =
			codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
		if not languageCode then
			errorText = '<code>'..codes..'</code> is not a valid language or script code.'
		elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
			errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
		else
			scriptCode = scriptCode:gsub(
				"(%a)(%a%a%a)",
				fixScriptCode,
				1
			)
		end
	elseif codes:find("^%a%a%a?") then
		languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
		languageCode = string.lower(languageCode)
		errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
	elseif codes:find("%-?%a%a%a%a$") then
		invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$")
		scriptCode = gsub(
			scriptCode,
			"(%a)(%a%a%a)",
			fixScriptCode
		)
		errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
	else
		errorText = '<code>'..codes..'</code> is not a valid language or script code.'
	end
	if not scriptCode or scriptCode == "" then
		scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown"
	end
	if errorText then
		errorText = ' <span style="font-size: smaller">[' .. errorText .. ']</span>'
	else
		errorText = ""
	end
	languageCode = m_data.redirects[languageCode] or languageCode
	return languageCode, scriptCode, errorText
end

local function tag(text, languageCode, script, italics)
	local data = langData[languageCode]
	-- Use Wikipedia code if it has been given: for instance,
	-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia
	-- code "ine-x-proto".
	languageCode = data and data.Wikipedia_code or languageCode
	
	local italicize = script == "Latn" and italics
	
	if not text then text = "[text?]" end
	
	local textDirectionMarkers = { "", "", "" }
	if data and data["direction"] == "rtl" then
		textDirectionMarkers = { ' dir="rtl"', '&rlm;', '&lrm;' }
	end
	
	local out = { textDirectionMarkers[2] }
	if italicize then
		table.insert(out, "<i lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode  .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</i>")
	else
		table.insert(out, "<span lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</span>")
	end
	table.insert(out, textDirectionMarkers[3])
	
	return table.concat(out)
end



function p.lang(frame)
	local parent = frame:getParent()
	local args = parent.args[1] and parent.args or frame.args
	
	local codes = args[1] and mw.text.trim(args[1])
	local text = args[2] or error("Provide text in the second parameter")
	
	local languageCode, scriptCode, errorText = getCodes(codes, text)
	
	local italics = args.italics or args.i or args.italic
	italics = not (italics == "n" or italics == "-" or italics == "no")
	
	return tag(text, languageCode, scriptCode, italics) .. errorText
end

local function linkToWiktionary(entry, linkText, languageCode)
	local data = langData[languageCode]
	local name
	if languageCode then
		if data and data.name then
			name = data.name
		elseif mw.language.fetchLanguageName(languageCode, 'en') ~= "" then
			-- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code.
			name = mw.language.fetchLanguageName(languageCode, 'en')
		else
			error("No name for the language " .. ("%q"):format(languageCode or nil) .. " could be found")
		end
		if entry:sub(1, 1) == "*" then
			if name ~= "" then
				entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
			else
				error("Language name is empty")
			end
		elseif data and data.type == "reconstructed" then
			mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
			local frame = mw.getCurrentFrame()
			-- Track reconstructed entries with no asterisk by transcluding
			-- a nonexistent template. This technique is used in Wiktionary:
			-- see [[wikt:Module:debug]].
			-- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]]
			pcall(frame.expandTemplate, frame,
				{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
			if name ~= "" then
				entry = "Reconstruction:" .. name .. "/" .. entry
			else
				error("Language name is empty")
			end
		elseif data and data.type == "appendix" then
			if name ~= "" then
				entry = "Appendix:" .. name .. "/" .. entry
			else
				error("Language name is empty")
			end
		end
		if entry and linkText then
			return "[[wikt:" .. entry .. "#" .. name .. "|" .. linkText .. "]]"
		else
			error("linkToWiktionary needs a Wiktionary entry or link text, or both")
		end
	else
		return "[[wikt:" .. entry .. "|" .. linkText .. "]]"
	end
end

function p.wiktlang(frame)
	local parent = frame:getParent()
	local args = parent.args[1] and parent.args or frame.args
	
	local codes = args[1] and mw.text.trim(args[1])
	local word1 = args[2]
	local word2 = args[3]
	
	if not args[2] then
		error("Parameter 2 is required")
	end
	
	local languageCode, scriptCode, errorText = getCodes(codes, word1)
	
	local italics = args.italics or args.i
	italics = not (italics == "n" or italics == "-")
	
	local entry, linkText
	if checkForString(word2) and checkForString(word1) then
		entry = makeEntryName(word1, languageCode)
		linkText = word2
	elseif checkForString(word1) then
		entry = makeEntryName(word1, languageCode)
		linkText = word1
	end
	
	local out
	if languageCode and entry and linkText then
		out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics)
	elseif entry and linkText then
		out = linkToWiktionary(entry, linkText)
	else
		out = '<span style="font-size: smaller;">[text?]</span>'
	end
	
	if out and errorText then
		return out .. errorText
	else
		return errorText or error("The function wiktlang generated nothing")
	end
end

function p.wikt(frame)
	local parent = frame:getParent()
	local args = parent.args[1] and parent.args or frame.args
	
	local codes = args[1] and mw.text.trim(args[1])
	local word1 = args[2]
	local word2 = args[3]
	
	if not word1 then
		error("Provide a word in parameter 2.")
	end
	
	local languageCode, scriptCode, errorText = getCodes(codes, word1)
	
	local entry, linkText
	if checkForString(word2) and checkForString(word1) then
		entry = makeEntryName(word1, languageCode)
		linkText = word2
	elseif checkForString(word1) then
		entry = makeEntryName(word1, languageCode)
		linkText = word1
	end
	
	local out
	if languageCode and entry and linkText then
		out = linkToWiktionary(entry, linkText, languageCode) 
	elseif entry and linkText then
		out = linkToWiktionary(entry, linkText)
	else
		out = '<span style="font-size: smaller;">[text?]</span>'
	end
	
	if out and errorText then
		return out and out .. errorText
	else
		return errorText or error("The function wikt generated nothing")
	end
end

return p