Mô đun:Ancient Greek
Giao diện
local p = {}local macron = mw.ustring.char(0x304)local breve = mw.ustring.char(0x306)local rough = mw.ustring.char(0x314)local smooth = mw.ustring.char(0x313)local diaeresis = mw.ustring.char(0x308)local acute = mw.ustring.char(0x301)local grave = mw.ustring.char(0x300)local circumflex = mw.ustring.char(0x342)local Latin_circumflex = mw.ustring.char(0x302)local subscript = mw.ustring.char(0x345)local macron_circumflex = macron .. diaeresis .. '?' .. Latin_circumflexlocal is_velar = { ['κ'] = true, ['γ'] = true, ['χ'] = true, ['ξ'] = true, }local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"local basic_Greek = "[\206-\207][\128-\191]" -- excluding first line of Greek and Coptic block: ͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳlocal info = {}-- The tables are shared among different characters so that they can be checked-- for equality if needed, and to use less space.local vowel = { vowel = true, diacritic_seat = true }local iota = { vowel = true, diacritic_seat = true, offglide = true }local upsilon = { vowel = true, diacritic_seat = true, offglide = true }-- Technically rho is only a seat for rough or smooth breathing.local rho = { consonant = true, diacritic_seat = true }local consonant = { consonant = true }local diacritic = { diacritic = true }-- Needed for equality comparisons.local breathing = { diacritic = true }local function add_info(characters, t) if type(characters) == "string" then for character in string.gmatch(characters, UTF8_char) do info[character] = t end else for _, character in ipairs(characters) do info[character] = t end endendadd_info({ macron, breve, diaeresis, acute, grave, circumflex, subscript, }, diacritic)add_info({rough, smooth}, breathing)add_info("ΑΕΗΟΩαεηοω", vowel)add_info("Ιι", iota)add_info("Υυ", upsilon)add_info("ΒΓΔΖΘΚΛΜΝΞΠΡΣΤΦΧΨϜϘϺϷͶϠβγδζθκλμνξπρσςτφχψϝϙϻϸͷϡ", consonant)add_info("Ρρ", rho)local not_recognized = {}setmetatable(info, { __index = function() return not_recognized end})local function quote(str) return "“" .. str .. "”"endlocal correspondences = { -- Vowels ["α"] = "a", ["ε"] = "e", ["η"] = "e" .. macron, ["ι"] = "i", ["ο"] = "o", ["υ"] = "u", ["ω"] = "o" .. macron, -- Consonants ["β"] = "b", ["γ"] = "g", ["δ"] = "d", ["ζ"] = "z", ["θ"] = "th", ["κ"] = "k", ["λ"] = "l", ["μ"] = "m", ["ν"] = "n", ["ξ"] = "x", ["π"] = "p", ["ρ"] = "r", ["σ"] = "s", ["ς"] = "s", ["τ"] = "t", ["φ"] = "ph", ["ψ"] = "ps", -- Archaic letters ["ϝ"] = "w", ["ϻ"] = "ś", ["ϙ"] = "q", ["ϡ"] = "š", ["ͷ"] = "v", -- Diacritics [smooth] = '', [rough] = '', -- h is added below in the `transliterate` function. [breve] = '',}local ALA_LC = { ["χ"] = "ch", [acute] = '', [grave] = '', [circumflex] = '', [subscript] = '', [diaeresis] = '', [macron] = '',}local Wiktionary_transliteration = { ["χ"] = "kh", [circumflex] = Latin_circumflex, [subscript] = 'i',}local function add_index_metamethod(t, index_metamethod) local mt = getmetatable(t) if not mt then mt = {} setmetatable(t, mt) end mt.__index = index_metamethodend--[=[ This breaks a word into meaningful "tokens", which are individual letters or diphthongs with their diacritics. Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].--]=]local function tokenize(text) local tokens, vowel_info, prev_info = {}, {}, {} local token_i = 1 local prev for character in string.gmatch(mw.ustring.toNFD(text), UTF8_char) do local curr_info = info[character] -- Split vowels between tokens if not a diphthong. if curr_info.vowel then if prev and (not (curr_info.offglide and prev_info.vowel) -- υυ → υ, υ -- ιυ → ι, υ or prev_info.offglide and curr_info == upsilon) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character table.insert(vowel_info, { index = token_i }) elseif curr_info.diacritic then tokens[token_i] = (tokens[token_i] or "") .. character if prev_info.vowel or prev_info.diacritic then if character == diaeresis then -- Current token is vowel, vowel, possibly other diacritics, -- and a diaeresis. -- Split the current token into two: -- the first letter, then the second letter plus any diacritics. local previous_vowel, vowel_with_diaeresis = string.match(tokens[token_i], "^(" .. basic_Greek .. ")(" .. basic_Greek .. ".+)") if previous_vowel then tokens[token_i], tokens[token_i + 1] = previous_vowel, vowel_with_diaeresis token_i = token_i + 1 end end elseif prev_info == rho then if curr_info ~= breathing then return string.format("The character %s cannot have the accent %s on it.", prev, "◌" .. character) end else error("The character " .. quote(prev) .. " cannot have a diacritic on it.") end elseif curr_info == rho then if prev and not (prev_info == breathing and info[string.match(tokens[token_i], "^" .. basic_Greek)] == rho) then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character else if prev then token_i = token_i + 1 end tokens[token_i] = (tokens[token_i] or "") .. character end prev = character prev_info = curr_info end return tokensendfunction p.transliterate(text, system) add_index_metamethod(correspondences, system == "ALA-LC" and ALA_LC or Wiktionary_transliteration) if text == '῾' then return 'h' end text = mw.ustring.toNFD(text) --[[ Replace semicolon or Greek question mark with regular question mark, except after an ASCII alphanumeric character (to avoid converting semicolons in HTML entities). --]] text = mw.ustring.gsub(text, "([^A-Za-z0-9])[;" .. mw.ustring.char(0x37E) .. "]", "%1?") -- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common. text = text:gsub("·", ";") local tokens = tokenize(text) --now read the tokens local output = {} for i, token in pairs(tokens) do -- substitute each character in the token for its transliteration local translit = string.gsub(mw.ustring.lower(token), UTF8_char, correspondences) if token == 'γ' and is_velar[tokens[i + 1]] then -- γ before a velar should be <n> translit = 'n' elseif token == 'ρ' and tokens[i - 1] == 'ρ' then -- ρ after ρ should be <rh> translit = 'rh' elseif system == "Wiktionary" and mw.ustring.find(token, '^[αΑ].*' .. subscript .. '$') then -- add macron to ᾳ translit = mw.ustring.gsub(translit, '([aA])', '%1' .. macron) end if token:find(rough) then if mw.ustring.find(token, '[Ρρ]') then translit = translit .. 'h' else -- vowel translit = 'h' .. translit end end if system == "ALA-LC" and mw.ustring.find(token, '^[υΥ][^ιΙ]*$') then translit = translit:gsub('u', 'y'):gsub('U', 'Y') end -- Remove macron from a vowel that has a circumflex. if mw.ustring.find(translit, macron_circumflex) then translit = translit:gsub(macron, '') end -- Capitalize first character of transliteration. if token ~= mw.ustring.lower(token) then translit = mw.ustring.gsub(translit, "^.", mw.ustring.upper) end table.insert(output, translit) end return table.concat(output)endfunction p.translit(frame) local text = frame.args[1] or frame:getParent().args[1] local system = frame.args.system if system == nil or system == "" then system = "Wiktionary" elseif not (system == "ALA-LC" or system == "Wiktionary") then error('Transliteration system in |system= not recognized; choose between "ALA-LC" and "Wiktionary"') end local transliteration = p.transliterate(text, system) return '<span title="Ancient Greek transliteration" lang="grc-Latn"><i>' .. transliteration .. '</i></span>'endfunction p.bare_translit(frame) return p.transliterate(frame.args[1] or frame:getParent().args[1])endreturn p