Modul:languages/data
Dokumentasjon for modulen kan opprettast på Modul:languages/data/dok
local u = mw.ustring.char
local export = {}
--[=[
Here is a list of the language fields by order of frequency according to [[User:Erutuon/language_stuff]].
If the order changes, change the order here for potentially greater efficiency.
local fields = {
"canonical_name",
"wikidata_item",
"family",
"scripts",
"other_names",
"ancestors",
"type",
"translit",
"entry_name",
"sort_key",
"override_translit",
"wikimedia_codes",
"standard_chars",
"wikipedia_article",
"link_tr",
}
--[=[
Insert the fields into the table with their values as their frequency ranking.
{export.most_common_field = 1, export.second_most_common_field = 2, ... }
for i, field in ipairs(fields) do
export[field] = i
end
]=]
-- UTF-8 encoded strings for some commonly-used diacritics.
local c = {
grave = u(0x0300),
acute = u(0x0301),
circ = u(0x0302),
tilde = u(0x0303),
macron = u(0x0304),
overline = u(0x0305),
breve = u(0x0306),
dotabove = u(0x0307),
diaer = u(0x0308),
ringabove = u(0x030A),
dacute = u(0x030B),
caron = u(0x030C),
lineabove = u(0x030D),
dgrave = u(0x030F),
invbreve = u(0x0311),
commaabove = u(0x0313),
revcommaabove = u(0x0314),
dotbelow = u(0x0323),
diaerbelow = u(0x0324),
ringbelow = u(0x0325),
cedilla = u(0x0327),
ogonek = u(0x0328),
brevebelow = u(0x032E),
macronbelow = u(0x0331),
perispomeni = u(0x0342),
ypogegrammeni = u(0x0345),
CGJ = u(0x034F), -- combining grapheme joiner
dbrevebelow = u(0x035C),
dinvbreve = u(0x0361),
kamora = u(0x0484),
dasiapneumata = u(0x0485),
psilipneumata = u(0x0486),
kashida = u(0x0640),
fathatan = u(0x064B),
dammatan = u(0x064C),
kasratan = u(0x064D),
fatha = u(0x064E),
damma = u(0x064F),
kasra = u(0x0650),
shadda = u(0x0651),
sukun = u(0x0652),
hamzaabove = u(0x0654),
nunghunna = u(0x0658),
smallv = u(0x065A),
superalef = u(0x0670),
psili = u(0x1FBD),
coronis = u(0x1FBF),
ZWNJ = u(0x200C), -- zero width non-joiner
ZWJ = u(0x200D), -- zero width joiner
RSQuo = u(0x2019), -- right single quote
VS01 = u(0xFE00), -- variation selector 1
-- Punctuation to be used for standardChars field.
punc = " !#%&*+,-./:;<=>?@^_`|~\'()∅"
}
export.chars = c
-- PUA characters, generally used in sortkeys.
-- Note: if the limit needs to be increased, do so in powers of 2 (due to the way memory is allocated for tables).
local p = {}
for i = 1, 32 do
p[i] = u(0xF000+i-1)
end
export.puaChars = p
local s = {}
-- These values are placed here to make it possible to synchronise a group of languages without the need for a dedicated function module.
s["cau-Cyrl-displaytext"] = {
from = {"[IlІӀ]", "ᴴ"},
to = {"ӏ", "ᵸ"}
}
s["cau-Cyrl-entryname"] = {
remove_diacritics = c.grave .. c.acute .. c.macron,
from = s["cau-Cyrl-displaytext"].from,
to = s["cau-Cyrl-displaytext"].to
}
s["cau-Latn-entryname"] = {remove_diacritics = c.grave .. c.acute .. c.macron}
s["Cyrs-entryname"] = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.kamora .. c.dasiapneumata .. c.psilipneumata}
s["Cyrs-sortkey"] = {
from = {
"ї", "оу", -- 2 chars
"ґ", "ꙣ", "є", "[ѕꙃꙅ]", "ꙁ", "[іꙇ]", "[ђꙉ]", "[ѻꙩꙫꙭꙮꚙꚛ]", "ꙋ", "[ѡѿꙍѽ]", "ꙑ", "ѣ", "ꙗ", "ѥ", "ꙕ", "[ѧꙙ]", "[ѩꙝ]", "ꙛ", "ѫ", "ѭ", "ѯ", "ѱ", "ѳ", "ѵ", "ҁ" -- 1 char
},
to = {
"и" .. p[1], "у",
"г" .. p[1], "д" .. p[1], "е", "ж" .. p[1], "з", "и" .. p[1], "и" .. p[2], "о", "у", "х" .. p[1], "ы", "ь" .. p[1], "ь" .. p[2], "ь" .. p[3], "ю", "я", "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4], "я" .. p[5], "я" .. p[6], "я" .. p[7], "я" .. p[8], "я" .. p[9]
},
}
s["Grek-sortkey"] = {
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni,
from = {"ς"},
to = {"σ"}
}
local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters()
s["Kore-entryname"] = {
remove_diacritics = u(0x302E) .. u(0x302F),
from = {"([" .. HaniChars .. "])%(.-%)", "(.)%-(.)", "%([" .. HaniChars .. "]+%)"},
to = {"%1", "%1%2"}
}
s["Mong-displaytext"] = {
from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^ᠠ-ᠧ])", "([ᠠ-ᡂᡸ])ᠸ$"},
to = {"%1ᠢ%2", "%1ᠧ%2", "%1ᠧ"}
}
s["Mong-entryname"] = s["Mong-displaytext"]
s["roa-oil-sortkey"] = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove .. c.cedilla .. "'",
from = {"æ", "œ"},
to = {"ae", "oe"}
}
s["Tibt-displaytext"] = {
from = {"ༀ", "༌", "།།", "༚༚", "༚༝", "༝༚", "༝༝", "ཷ", "ཹ", "ེེ", "ོོ"},
to = {"ཨོཾ", "་", "༎", "༛", "༟", "࿎", "༞", "ྲཱྀ", "ླཱྀ", "ཻ", "ཽ"}
}
s["Tibt-entryname"] = s["Tibt-displaytext"]
export.shared = s
return export