Bước tới nội dung

Mô đun:Text

Trang mô đun bị khóa vô hạn
Bách khoa toàn thư mở Wikipedia

local Text = { serial = "2024-06-05",               suite  = "Text",               item   = 29387871 }--[=[Text utilities]=]local Failsafe    = Textlocal GlobalMod   = Textlocal Patterns    = { }local RangesLatin = falselocal SeekQuote   = falselocal foreignModule = function ( access, advanced, append, alt, alert )    -- Fetch global module    -- Precondition:    --     access    -- string, with name of base module    --     advanced  -- true, for require(); else mw.loadData()    --     append    -- string, with subpage part, if any; or false    --     alt       -- number, of wikidata item of root; or false    --     alert     -- true, for throwing error on data problem    -- Postcondition:    --     Returns whatever, probably table    -- 2019-10-29    local storage = access    local finer = function ()                      if append then                          storage = string.format( "%s/%s",                                                   storage,                                                   append )                      end                  end    local fun, lucky, r, suited    if advanced then        fun = require    else        fun = mw.loadData    end    GlobalMod.globalModules = GlobalMod.globalModules or { }    suited = GlobalMod.globalModules[ access ]    if not suited then        finer()        lucky, r = pcall( fun,  "Module:" .. storage )    end    if not lucky then        if not suited  and           type( alt ) == "number"  and           alt > 0 then            suited = string.format( "Q%d", alt )            suited = mw.wikibase.getSitelink( suited )            GlobalMod.globalModules[ access ] = suited or true        end        if type( suited ) == "string" then            storage = suited            finer()            lucky, r = pcall( fun, storage )        end        if not lucky and alert then            error( "Missing or invalid page: " .. storage, 0 )        end    end    return rend -- foreignModule()local function factoryQuote()    -- Create quote definitions    if not Text.quoteLang then        local quoting = foreignModule( "Text",                                       false,                                       "quoting",                                       Text.item )        if type( quoting ) == "table" then            Text.quoteLang = quoting.langs            Text.quoteType = quoting.types        end        if type( Text.quoteLang ) ~= "table" then            Text.quoteLang = { }        end        if type( Text.quoteType ) ~= "table" then            Text.quoteType = { }        end        if type( Text.quoteLang.en ) ~= "string" then            Text.quoteLang.en = "ld"        end        if type( Text.quoteType[ Text.quoteLang.en ] ) ~= "table" then            Text.quoteType[ Text.quoteLang.en ] = { { 8220, 8221 },                                                    { 8216, 8217 } }        end    endend -- factoryQuote()local function fiatQuote( apply, alien, advance )    -- Quote text    -- Parameter:    --     apply    -- string, with text    --     alien    -- string, with language code    --     advance  -- number, with level 1 or 2    local r = apply    local quotes, suite    factoryQuote()    if alien then        suite = mw.text.trim( alien )        if suite == "" then            suite = false        else            local s = Text.quoteLang[ suite ]            if s then                suite = s            else                local slang = suite:match( "^(%l+)-" )                if slang then                    suite = Text.quoteLang[ slang ]                end            end        end    end    if not suite then        suite = Text.quoteLang.en    end    quotes = Text.quoteType[ suite ]    if quotes then        local space        if quotes[ 3 ] then            space = "&#160;"        else            space = ""        end        quotes = quotes[ advance ]        if quotes then            r = mw.ustring.format( "%s%s%s%s%s",                                   mw.ustring.char( quotes[ 1 ] ),                                   space,                                   apply,                                   space,                                   mw.ustring.char( quotes[ 2 ] ) )        end    else        mw.log( "fiatQuote() " .. suite )    end    return rend -- fiatQuote()Text.char = function ( apply, again, accept )    -- Create string from codepoints    -- Parameter:    --     apply   -- table (sequence) with numerical codepoints, or nil    --     again   -- number of repetitions, or nil    --     accept  -- true, if no error messages to be appended    -- Returns: string    local r    if type( apply ) == "table" then        local bad   = { }        local codes = { }        local s        for k, v in pairs( apply ) do            s = type( v )            if s == "number" then                if v < 32  and  v ~= 9  and  v ~= 10 then                    v = tostring( v )                else                    v = math.floor( v )                    s = false                end            elseif s ~= "string" then                v = tostring( v )            end            if s then                table.insert( bad, v )            else                table.insert( codes, v )            end        end -- for k, v        if #bad == 0 then            if #codes > 0 then                r = mw.ustring.char( unpack( codes ) )                if again then                    if type( again ) == "number" then                        local n = math.floor( again )                        if n > 1 then                            r = r:rep( n )                        elseif n < 1 then                            r = ""                        end                    else                        s = "bad repetitions: " .. tostring( again )                    end                end            end        else            s = "bad codepoints: " .. table.concat( bad, " " )        end        if s  and  not accept then            r = tostring(  mw.html.create( "span" )                                  :addClass( "error" )                                  :wikitext( s ) )        end    end    return r or ""end -- Text.char()Text.concatParams = function ( args, apply, adapt )    -- Concat list items into one string    -- Parameter:    --     args   -- table (sequence) with numKey=string    --     apply  -- string (optional); separator (default: "|")    --     adapt  -- string (optional); format including "%s"    -- Returns: string    local collect = { }    for k, v in pairs( args ) do        if type( k ) == "number" then            v = mw.text.trim( v )            if v ~= "" then                if adapt then                    v = mw.ustring.format( adapt, v )                end                table.insert( collect, v )            end        end    end -- for k, v    return table.concat( collect,  apply or "|" )end -- Text.concatParams()Text.containsCJK = function ( analyse )    -- Is any CJK code within?    -- Parameter:    --     analyse  -- string    -- Returns: true, if CJK detected    local r    if not Patterns.CJK then        Patterns.CJK = mw.ustring.char( 91,                                         0x3400, 45,  0x9FFF,                                        0x20000, 45, 0x2B81F,                                        93 )    end    if mw.ustring.find( analyse, Patterns.CJK ) then        r = true    else        r = false    end    return rend -- Text.containsCJK()Text.getPlain = function ( adjust )    -- Remove wikisyntax from string, except templates    -- Parameter:    --     adjust  -- string    -- Returns: string    local i = adjust:find( "<!--", 1, true )    local r = adjust    local j    while i do        j = r:find( "-->",  i + 3,  true )        if j then            r = r:sub( 1, i ) .. r:sub( j + 3 )        else            r = r:sub( 1, i )        end        i = r:find( "<!--", i, true )    end    -- "<!--"    r = r:gsub( "(</?%l[^>]*>)", "" )         :gsub( "'''(.+)'''", "%1" )         :gsub( "''(.+)''", "%1" )         :gsub( "&nbsp;", " " )    return mw.text.unstrip( r )end -- Text.getPlain()Text.isLatinRange = function ( adjust )    -- Are characters expected to be latin or symbols within latin texts?    -- Precondition:    --     adjust  -- string, or nil for initialization    -- Returns: true, if valid for latin only    local r    if not RangesLatin then        RangesLatin = { {   0x07, 0x02AF },                        { 0x1D6B, 0x1D9A },                        { 0x1E00, 0x1EFF },                        { 0x2002, 0x203A },                        { 0x2190, 0x23BD } }    end    if not Patterns.Latin then        local range        Patterns.Latin = "^["        for i = 1, #RangesLatin do            range = RangesLatin[ i ]            Patterns.Latin = Patterns.Latin ..                           mw.ustring.char( range[ 1 ], 45, range[ 2 ] )        end    -- for i        Patterns.Latin = Patterns.Latin .. "]*$"    end    if adjust then        if mw.ustring.match( adjust, Patterns.Latin ) then            r = true        else            r = false        end    end    return rend -- Text.isLatinRange()Text.isQuote = function ( ask )    -- Is this character any quotation mark?    -- Parameter:    --     ask  -- string, with single character    -- Returns: true, if ask is quotation mark    local r    if not SeekQuote then        SeekQuote = mw.ustring.char(   34,       -- "                                       39,       -- '                                      171,       -- laquo                                      187,       -- raquo                                     8216,       -- lsquo                                     8217,       -- rsquo                                     8218,       -- sbquo                                     8220,       -- ldquo                                     8221,       -- rdquo                                     8222,       -- bdquo                                     8249,       -- lsaquo                                     8250,       -- rsaquo                                     0x300C,     -- CJK                                     0x300D,     -- CJK                                     0x300E,     -- CJK                                     0x300F )    -- CJK    end    if ask == "" then        r = false    elseif mw.ustring.find( SeekQuote, ask, 1, true ) then        r = true    else        r = false    end    return rend -- Text.isQuote()Text.listToText = function ( args, adapt )    -- Format list items similar to mw.text.listToText()    -- Parameter:    --     args   -- table (sequence) with numKey=string    --     adapt  -- string (optional); format including "%s"    -- Returns: string    local collect = { }    for k, v in pairs( args ) do        if type( k ) == "number" then            v = mw.text.trim( v )            if v ~= "" then                if adapt then                    v = mw.ustring.format( adapt, v )                end                table.insert( collect, v )            end        end    end -- for k, v    return mw.text.listToText( collect )end -- Text.listToText()Text.quote = function ( apply, alien, advance )    -- Quote text    -- Parameter:    --     apply    -- string, with text    --     alien    -- string, with language code, or nil    --     advance  -- number, with level 1 or 2, or nil    -- Returns: quoted string    local mode, slang    if type( alien ) == "string" then        slang = mw.text.trim( alien ):lower()    else        local pageLang = mw.title.getCurrentTitle().pageLanguage        if pageLang then            slang = pageLang.code        else            slang = mw.language.getContentLanguage():getCode()        end    end    if advance == 2 then        mode = 2    else        mode = 1    end    return fiatQuote( mw.text.trim( apply ), slang, mode )end -- Text.quote()Text.quoteUnquoted = function ( apply, alien, advance )    -- Quote text, if not yet quoted and not empty    -- Parameter:    --     apply    -- string, with text    --     alien    -- string, with language code, or nil    --     advance  -- number, with level 1 or 2, or nil    -- Returns: string; possibly quoted    local r = mw.text.trim( apply )    local s = mw.ustring.sub( r, 1, 1 )    if s ~= ""  and  not Text.isQuote( s, advance ) then        s = mw.ustring.sub( r, -1, 1 )        if not Text.isQuote( s ) then            r = Text.quote( r, alien, advance )        end    end    return rend -- Text.quoteUnquoted()Text.removeDiacritics = function ( adjust )    -- Remove all diacritics    -- Parameter:    --     adjust  -- string    -- Returns: string; all latin letters should be ASCII    --                  or basic greek or cyrillic or symbols etc.    local cleanup, decomposed    if not Patterns.Combined then        Patterns.Combined = mw.ustring.char( 91,                                             0x0300, 45, 0x036F,                                             0x1AB0, 45, 0x1AFF,                                             0x1DC0, 45, 0x1DFF,                                             0xFE20, 45, 0xFE2F,                                             93 )    end    decomposed = mw.ustring.toNFD( adjust )    cleanup    = mw.ustring.gsub( decomposed, Patterns.Combined, "" )    return mw.ustring.toNFC( cleanup )end -- Text.removeDiacritics()Text.removeWhitespace = function ( adjust )    -- Remove all whitespace, or replace with ASCII space    -- Parameter:    --     adjust  -- string    -- Returns: string; modified    local r = mw.text.decode( adjust )    if r:find( "&", 1, true ) then        r = r:gsub( "&lrm;", "" )             :gsub( "&rlm;", "" )             :gsub( "&zwj;", "" )             :gsub( "&zwnj;", "" )             :gsub( "&thinsp;", " " )             :gsub( "&ensp;", " " )             :gsub( "&emsp;", " " )    end    if not Patterns.Whitespace then       Patterns.Whitespace = mw.ustring.char( 0x00AD,                                              91, 0x200C, 45, 0x200F, 93,                                              91, 0x2028, 45, 0x202E, 93,                                              0x205F,                                              0x2060 )       Patterns.Space      = mw.ustring.char( 0x00A0,                                              0x1680,                                              91, 0x2000, 45, 0x200A, 93,                                              0x202F,                                              0x205F,                                              0x3000,                                              0x303F )    end    r = mw.ustring.gsub( r, Patterns.Whitespace, "" )    r = mw.ustring.gsub( r, Patterns.Space, " " )    return mw.text.trim( r )end -- Text.removeWhitespace()Text.sentenceTerminated = function ( analyse )    -- Is string terminated by dot, question or exclamation mark?    --     Quotation, link termination and so on granted    -- Parameter:    --     analyse  -- string    -- Returns: true, if sentence terminated    local r  = mw.text.trim( analyse )    local lt = r:find( "<", 1, true )    if not Patterns.Terminated then        Patterns.Terminated = mw.ustring.char( 91,                                               0x3002,                                               0xFF01,                                               0xFF0E,                                               0xFF1F )                                           .. "!%.%?…][\"'%]‹›«»‘’“”]*$"    end    if lt then        r = r:gsub( "</span>", "" )    end    if mw.ustring.find( r, Patterns.Terminated ) then        r = true    elseif lt then        local s = "<bdi[^>]* dir=\"([lr]t[rl])\".+</bdi></bdo>"        s = r:match( s )        if s then            if mw.language.getContentLanguage():isRTL() then                r = ( s == "ltr" )            else                r = ( s == "rtl" )            end        else            r = false        end    else        r = false    end    return rend -- Text.sentenceTerminated()Text.tokenWords = function ( adjust )    -- Split text in words of digits or letters    -- Precondition:    --     adjust  -- string    -- Returns: string with    local r = mw.uri.decode( adjust, "WIKI" )    if r:find( "&", 1, true ) then        r = mw.text.decode( r )    end    r = Text.removeWhitespace( r )    r = mw.ustring.gsub( r, "[%p%s]+", " " )    return rend -- Text.tokenWords()Text.ucfirstAll = function ( adjust )    -- Capitalize all words    -- Precondition:    --     adjust  -- string    -- Returns: string with all first letters in upper case    local r = " " .. adjust    local i = 1    local c, j, m    if adjust:find( "&" ) then        r = r:gsub( "&amp;",      "&#38;" )             :gsub( "&lt;",       "&#60;" )             :gsub( "&gt;",       "&#62;" )             :gsub( "&nbsp;",    "&#160;" )             :gsub( "&thinsp;", "&#8201;" )             :gsub( "&zwnj;",   "&#8204;" )             :gsub( "&zwj;",    "&#8205;" )             :gsub( "&lrm;",    "&#8206;" )             :gsub( "&rlm;",    "&#8207;" )        m = true    end    while i do        i = mw.ustring.find( r, "%W%l", i )        if i then            j = i + 1            c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )            r = string.format( "%s%s%s",                               mw.ustring.sub( r, 1, i ),                               c,                               mw.ustring.sub( r, i + 2 ) )            i = j        end    end -- while i    r = r:sub( 2 )    if m then        r = r:gsub(     "&#38;", "&amp;" )             :gsub(     "&#60;", "&lt;" )             :gsub(     "&#62;", "&gt;" )             :gsub(    "&#160;", "&nbsp;" )             :gsub(   "&#8201;", "&thinsp;" )             :gsub(   "&#8204;", "&zwnj;" )             :gsub(   "&#8205;", "&zwj;" )             :gsub(   "&#8206;", "&lrm;" )             :gsub(   "&#8207;", "&rlm;" )             :gsub( "&#X(%x+);", "&#x%1;" )    end    return rend -- Text.ucfirstAll()Text.uprightNonlatin = function ( adjust )    -- Ensure non-italics for non-latin text parts    --     One single greek letter might be granted    -- Precondition:    --     adjust  -- string    -- Returns: string with non-latin parts enclosed in <span>    local r    Text.isLatinRange()    if mw.ustring.match( adjust, Patterns.Latin ) then        -- latin only, horizontal dashes, quotes        r = adjust    else        local c        local e = mw.html.create( "span" )        local j = false        local k = 1        local m = false        local n = mw.ustring.len( adjust )        local p        local flat = function ( a )                  -- isLatin                  local range                  for i = 1, #RangesLatin do                      range = RangesLatin[ i ]                      if a >= range[ 1 ]  and  a <= range[ 2 ] then                          return true                      end                  end    -- for i              end -- flat()        local focus = function ( a )                  -- char is not ambivalent                  local r = ( a > 64 )                  if r then                      r = ( a < 8192  or  a > 8212 )                  else                      r = ( a == 38  or  a == 60 )    -- '&' '<'                  end                  return r              end -- focus()        local form = function ( a )                return string.format( p,                                      r,                                      mw.ustring.sub( adjust, k, j - 1 ),                                      mw.ustring.sub( adjust, j, a ) )              end -- form()        e:attr( "dir", "auto" )         :css( "font-style", "normal" )         :wikitext( "%s" )        p = "%s%s" .. tostring( e )        r = ""        for i = 1, n do            c = mw.ustring.codepoint( adjust, i, i )            if focus( c ) then                if flat( c ) then                    if j then                        if m then                            if i == m then                                -- single greek letter.                                j = false                            end                            m = false                        end                        if j then                            local nx = i - 1                            local s  = ""                            for ix = nx, 1, -1 do                                c = mw.ustring.sub( adjust, ix, ix )                                if c == " "  or  c == "(" then                                    nx = nx - 1                                    s  = c .. s                                else                                    break -- for ix                                end                            end -- for ix                            r = form( nx ) .. s                            j = false                            k = i                        end                    end                elseif not j then                    j = i                    if c >= 880  and  c <= 1023 then                        -- single greek letter?                        m = i + 1                    else                        m = false                    end                end            elseif m then                m = m + 1            end        end    -- for i        if j  and  ( not m  or  m < n ) then            r = form( n )        else            r = r .. mw.ustring.sub( adjust, k )        end    end    return rend -- Text.uprightNonlatin()Failsafe.failsafe = function ( atleast )    -- Retrieve versioning and check for compliance    -- Precondition:    --     atleast  -- string, with required version    --                         or wikidata|item|~|@ or false    -- Postcondition:    --     Returns  string  -- with queried version/item, also if problem    --              false   -- if appropriate    -- 2024-03-01    local since  = atleast    local last   = ( since == "~" )    local linked = ( since == "@" )    local link   = ( since == "item" )    local r    if last  or  link  or  linked  or  since == "wikidata" then        local item = Failsafe.item        since = false        if type( item ) == "number"  and  item > 0 then            local suited = string.format( "Q%d", item )            if link then                r = suited            else                local entity = mw.wikibase.getEntity( suited )                if type( entity ) == "table" then                    local seek = Failsafe.serialProperty or "P348"                    local vsn  = entity:formatPropertyValues( seek )                    if type( vsn ) == "table"  and                       type( vsn.value ) == "string"  and                       vsn.value ~= "" then                        if last  and  vsn.value == Failsafe.serial then                            r = false                        elseif linked then                            if mw.title.getCurrentTitle().prefixedText                               ==  mw.wikibase.getSitelink( suited ) then                                r = false                            else                                r = suited                            end                        else                            r = vsn.value                        end                    end                end            end        elseif link then            r = false        end    end    if type( r ) == "nil" then        if not since  or  since <= Failsafe.serial then            r = Failsafe.serial        else            r = false        end    end    return rend -- Failsafe.failsafe()Text.test = function ( about )    local r    if about == "quote" then        factoryQuote()        r = { QuoteLang = Text.quoteLang,              QuoteType = Text.quoteType }    end    return rend -- Text.test()-- Exportlocal p = { }function p.char( frame )    local params = frame:getParent().args    local story = params[ 1 ]    local codes, lenient, multiple    if not story then        params = frame.args        story  = params[ 1 ]    end    if story then        local items = mw.text.split( story, "%s+" )        if #items > 0 then            local j            lenient  = ( params.errors == "0" )            codes    = { }            multiple = tonumber( params[ "*" ] )            for k, v in pairs( items ) do                if v:sub( 1, 1 ) == "x" then                    j = tonumber( "0" .. v )                elseif v == "" then                    v = false                else                    j = tonumber( v )                end                if v then                    table.insert( codes,  j or v )                end            end -- for k, v        end    end    return Text.char( codes, multiple, lenient )endfunction p.concatParams( frame )    local args    local template = frame.args.template    if type( template ) == "string" then        template = mw.text.trim( template )        template = ( template == "1" )    end    if template then        args = frame:getParent().args    else        args = frame.args    end    return Text.concatParams( args,                              frame.args.separator,                              frame.args.format )endfunction p.containsCJK( frame )    return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""endfunction p.getPlain( frame )    return Text.getPlain( frame.args[ 1 ] or "" )endfunction p.isLatinRange( frame )    return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""endfunction p.isQuote( frame )    return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""endfunction p.listToFormat(frame)    local lists = {}    local pformat = frame.args["format"]    local sep = frame.args["sep"] or ";"    -- Parameter parsen: Listen    for k, v in pairs(frame.args) do        local knum = tonumber(k)        if knum then lists[knum] = v end    end    -- Listen splitten    local maxListLen = 0    for i = 1, #lists do        lists[i] = mw.text.split(lists[i], sep)        if #lists[i] > maxListLen then maxListLen = #lists[i] end    end    -- Ergebnisstring generieren    local result = ""    local result_line = ""    for i = 1, maxListLen do        result_line = pformat        for j = 1, #lists do            result_line = mw.ustring.gsub( result_line,                                           "%%s",                                           lists[ j ][ i ],                                           1 )        end        result = result .. result_line    end    return resultendfunction p.listToText( frame )    local args    local template = frame.args.template    if type( template ) == "string" then        template = mw.text.trim( template )        template = ( template == "1" )    end    if template then        args = frame:getParent().args    else        args = frame.args    end    return Text.listToText( args, frame.args.format )endfunction p.quote( frame )    local slang = frame.args[2]    if type( slang ) == "string" then        slang = mw.text.trim( slang )        if slang == "" then            slang = false        end    end    return Text.quote( frame.args[ 1 ] or "",                       slang,                       tonumber( frame.args[3] ) )endfunction p.quoteUnquoted( frame )    local slang = frame.args[2]    if type( slang ) == "string" then        slang = mw.text.trim( slang )        if slang == "" then            slang = false        end    end    return Text.quoteUnquoted( frame.args[ 1 ] or "",                               slang,                               tonumber( frame.args[3] ) )endfunction p.removeDiacritics( frame )    return Text.removeDiacritics( frame.args[ 1 ] or "" )endfunction p.sentenceTerminated( frame )    return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""endfunction p.tokenWords( frame )    return Text.tokenWords( frame.args[ 1 ] or "" )endfunction p.ucfirstAll( frame )    return Text.ucfirstAll( frame.args[ 1 ] or "" )endfunction p.unstrip( frame )    return mw.text.trim( mw.text.unstrip( frame.args[ 1 ] or "" ) )endfunction p.uprightNonlatin( frame )    return Text.uprightNonlatin( frame.args[ 1 ] or "" )endfunction p.zip(frame)    local lists = {}    local seps = {}    local defaultsep = frame.args["sep"] or ""    local innersep = frame.args["isep"] or ""    local outersep = frame.args["osep"] or ""    -- Parameter parsen    for k, v in pairs(frame.args) do        local knum = tonumber(k)        if knum then lists[knum] = v else            if string.sub(k, 1, 3) == "sep" then                local sepnum = tonumber(string.sub(k, 4))                if sepnum then seps[sepnum] = v end            end        end    end    -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden    for i = 1, math.max(#seps, #lists) do        if not seps[i] then seps[i] = defaultsep end    end    -- Listen splitten    local maxListLen = 0    for i = 1, #lists do        lists[i] = mw.text.split(lists[i], seps[i])        if #lists[i] > maxListLen then maxListLen = #lists[i] end    end    local result = ""    for i = 1, maxListLen do        if i ~= 1 then result = result .. outersep end        for j = 1, #lists do            if j ~= 1 then result = result .. innersep end            result = result .. (lists[j][i] or "")        end    end    return resultendp.failsafe = function ( frame )    -- Versioning interface    local s = type( frame )    local since    if s == "table" then        since = frame.args[ 1 ]    elseif s == "string" then        since = frame    end    if since then        since = mw.text.trim( since )        if since == "" then            since = false        end    end    return Failsafe.failsafe( since )  or  ""end -- p.failsafe()p.Text = function ()    return Textend -- p.Textsetmetatable( p,  { __call = function ( func, ... )                                 setmetatable( p, nil )                                 return Failsafe                             end } )return p