Modul:zh-translit

Dokumentasjon for modulen kan opprettast på Modul:zh-translit/dok

local export = {}

local function fail(lang, request)
	local langObj, req, cat = require("Module:languages").getByCode(lang)
	if request then
		cat = {"[[Category:Requests for transliteration of " .. langObj:getCanonicalName() .. " terms]]"}
	end
	return nil, true, cat
end

-- These need to be pattern-escaped (e.g. "-" as "%-").
local zhpron_lect_code = {
	["cdo"] = "md",			-- Min Dong
	["cjy"] = "j",			-- Jin
	["cmn"] = "m",			-- Mandarin
	["gan"] = "g",			-- Gan
	["hak"] = "h",			-- Hakka
	["hsn"] = "x",			-- Xiang
	["ltc"] = "mc",			-- Middle Chinese
	["mnp"] = "mb",			-- Min Bei
	["nan"] = "mn",			-- Min Nan
	["och"] = "oc",			-- Old Chinese
	["wuu"] = "w",			-- Wu
	["yue"] = "c",			-- Cantonese
	["zh"] = "m",			-- Chinese (general); uses Mandarin pinyin
	["zhx-tai"] = "c%-t",	-- Taishanese
	["zhx-teo"] = "mn%-t",	-- Teochew
}

function export.tr(text, lang, sc)
	if (not text) or text == "" then
		return text
	end
	
	-- Temporary.
	if not zhpron_lect_code[lang] then
		lang = require("Module:languages").getByCode(lang, nil, true):getNonEtymologicalCode()
	end
	
	local content, tr = mw.title.new(text)
	content = content and content:getContent()
	if lang ~= "ltc" and lang ~= "och" then
		if not content then
			return fail(lang)
		elseif lang == "cmn" or lang == "wuu" or lang == "yue" or lang == "zh" or lang == "zhx-tai" then
			content = content:gsub(",([^ ])", ";%1")
		else
			content = content:gsub("/([^ ])", ";%1")
		end
	end
	
	if content then
		if lang == "ltc" or lang == "och" then
			local pron
			for pron_sect in content:gmatch("{{zh%-pron[^}]*|%s?" .. zhpron_lect_code[lang] .. "=[^|}\n]-([^=|}\n]+)") do
				if pron and pron ~= pron_sect then
					return fail(lang)
				end
				pron = pron_sect
			end
			tr = pron
		else
			tr = content:match("{{zh%-pron[^}]*|%s?" .. zhpron_lect_code[lang] .. "=[^|}\n]-;*([^;=|}\n]+){default}")
			
			if not tr then
				local lower = require("Module:string utilities").lower
				local prons
				for pron_sect in content:gmatch("{{zh%-pron[^}]*|%s?" .. zhpron_lect_code[lang] .. "=([^}|\n]*[^%s}|\n][^}|\n]*)") do
					prons = mw.text.split(pron_sect, ";")
					for i, pron in ipairs(prons) do
						if pron:match("=") then prons[i] = nil end
					end
					
					if #prons > 2 then
						return fail(lang, true)
					elseif #prons > 1 then
						if prons[1]:gsub("^.", lower) == prons[2]:gsub("^.", lower) then
							prons[1] = prons[1]:gsub("^.", lower)
						else
							return fail(lang, true)
						end
					elseif tr and prons[1] ~= tr then
						if prons[1]:gsub("^.", lower) == tr:gsub("^.", lower) then
							prons[1] = prons[1]:gsub("^.", lower)
						else
							return fail(lang, true)
						end
					end
					
					tr = prons[1]
				end
				if not tr then
					return fail(lang)
				end
			end
		end
	end
	
	if lang == "cmn" or lang == "zh" then
		local Hani = require("Module:scripts").getByCode("Hani"):getCharacters()
		if mw.ustring.match(tr, "[" .. Hani .. "]") then
			local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
			local tag = mw.loadData("Module:zh/data/cmn-tag").MT
			tr = tr
				:gsub("一", "yī")
				:gsub("不", "bù")
			tr = tr:gsub(UTF8_char, function(c1) return tag[c1] and tag[c1][1] or c1 end)
		end
	elseif lang == "hak" then
		-- TODO
	elseif lang == "ltc" or lang == "och" then
		if tr == "n" then return fail(lang) end
		local index = {}
		if tr then
			if lang == "ltc" then
				index = mw.text.split(tr, ",")
			else
				index = mw.text.split(tr, ";")
			end
		end
		for i = 1, mw.ustring.len(text) do
			local module_type = lang .. "-pron"
			if lang == "och" then module_type = module_type .. "-ZS" end
			
			local success, data_module = pcall(require, "Module:zh/data/" .. module_type .. "/" .. mw.ustring.sub(text, i, i))
			
			if not success or (((not index[i]) or index[i] == "y") and #data_module > 1) then return fail(lang) end
			
			if index[i] == "y" then
				index[i] = 1
			elseif index[i] then
				index[i] = tonumber(index[i])
			end
			
			index[i] = index[i] and data_module[index[i]] or data_module[1]
			
			if lang == "ltc" then
				local data = mw.loadData("Module:ltc-pron/data")
				local initial, final, tone = require("Module:ltc-pron").infer_categories(index[i])
				index[i] = data.initialConv["Zhengzhang"][initial] .. data.finalConv["Zhengzhang"][final] .. tone
			else
				index[i] = index[i][6]
			end
		end
		tr = table.concat(index, " ")
		if lang == "och" then tr = "*" .. tr end
	elseif lang == "nan" then
		-- TODO
	elseif lang == "yue" then
		tr = tr:gsub("[%d-]+", "<sup>%0</sup>")
	elseif lang == "zhx-tai" then
		tr = tr:gsub("[%d*]+%-?[%d*]*", "<sup>%0</sup>")
	elseif lang == "zhx-teo" then
		-- TODO
	else
		tr = require("Module:" .. lang .. "-pron").rom(tr)
	end
	
	-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
	return tr .. " "
end

return export