Module:UtilsString

local p = {} local h = {}

function p.endsWith(str, pattern) return h.endsWith(str, pattern, true) end function p.endsWithRegex(str, pattern) return h.endsWith(str, pattern, false) end function p._endsWith(pattern) return function(str) return h.endsWith(str, pattern, true) end end function p._endsWithRegex(pattern) return function(str) return h.endsWith(str, pattern, false) end end function h.endsWith(str, pattern, plain) local endIndex = 0 repeat endIndex = select(2, str:find(pattern, endIndex + 1, plain)) until endIndex == nil or endIndex == #str or endIndex == 0 return endIndex ~= nil end

-- By http://lua-users.org/wiki/RiciLake function p.interpolate(formatStr, tab) return (formatStr:gsub('($%b{})', function(w) return tab[w:sub(3, -2)] or w end)) end

function p.isEmpty(str) return str == nil or str == "" end

function p.isBlank(str) return str == nil or p.trim(str) == "" end

function p.kebabCase(str) str = string.gsub(str, "(%u)", "-%1") str = string.gsub(str, "^%-", "") str = string.lower(str) str = string.gsub(str, " ", "-") return str end

function p.nilIfEmpty(str) if str and str ~= "" then return str end end

function p.notBlank(str) return not p.isBlank(str) end

function p.notEmpty(str) return not p.isEmpty(str) end

function p._split(pattern, plain) return function(str) return p.split(str, pattern, plain) end end -- Original source: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua function p.split(text, pattern, plain) local ret = {} -- Edge case: gsplit with pattern "" breaks Unicode characters which are composed of multiple bytes. -- Instead we simply iterate over the characters, which is effectively what splitting with "" does. -- If we had Lua 5.3 we could probably do something with the utf8 library. -- Instead we use a snippet from http://lua-users.org/wiki/LuaUnicode if pattern == "" then for m in string.gmatch(text, "([%z\1-\127\194-\244][\128-\191]*)") do         ret[#ret+1] = m        end else for m in h.gsplit( text, pattern, plain ) do			ret[#ret+1] = m		end end return ret end function h.gsplit(text, pattern, plain) if not pattern then pattern = '%s*,%s*' end local s, l = 1, text:len return function if s then local e, n = text:find( pattern, s, plain ) local ret if not e then ret = text:sub( s ) s = nil elseif n < e then -- Empty separator! ret = text:sub( s, e ) if e < l then s = e + 1 else s = nil end else ret = e > s and text:sub( s, e - 1 ) or '' s = n + 1 end return ret end end, nil, nil end

function p.startsWith(str, pattern) return h.startsWith(str, pattern, true) end function p.startsWithRegex(str, pattern) return h.startsWith(str, pattern, false) end function p._startsWith(pattern) return function(str) return h.startsWith(str, pattern, true) end end function p._startsWithRegex(pattern) return function(str) return h.startsWith(str, pattern, false) end end function h.startsWith(str, pattern, plain) return str:find(pattern, 1, plain) == 1 end

function p.stripTrailingParentheses(str) local endMarker = "${endMarker}" local index = string.find(str..endMarker, " %(.*%)"..endMarker) if index ~= nil then str = string.sub(str, 1, index-1) end return str end

function p.sub(str, s, e)	return p._sub(s, e)(str) end function p._sub(s, e)	return function(str) return string.sub(str, s, e)	end end

-- Source: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua function p.trim(s, charset) return p._trim(charset)(s) end function p._trim(charset) charset = charset or '\t\r\n\f%s　' --last char is a fullwidth space return function(s) s = s:gsub( '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' ) return s	end end

function p.Schemas return { split = { str = { type = "string", required = true, },			pattern = { type = "string", default = mw.dumpObject("%s*,%s*"), },			plain = { type = "boolean", },		},		sub = { str = { type = "string", required = true, },			startIndex = { type = "number", required = true, },			endIndex = { type = "number", default = "#str", },		},		trim = { pattern = { type = "string", },			str = { type = "string", required = true, },		},		format = { formatStr = { type = "string", required = true, },			["..."] = {				type = "array", items = { type = "string", },				required = true, },		},		interpolate = { formatStr = { type = "string", required = true, },			args = { type = "map", required = true, keys = { type = "string" }, values = { type = "string" }, },		},	} end

function p.Documentation return { isEmpty = { params = {"str"}, returns = ' if and only if the value is   or  ', cases = { {					args = {nil}, expect = true, },				{					args = {""}, expect = true, },				{					args = {" "}, expect = false, },			},		},		isBlank = { params = {"str"}, returns = " if and only if   is nil, blank, or whitespace.", cases = { {					args = {" "}, expect = true, },				{					args = {"\n\n\n"}, expect = true, },				{					args = {nil}, expect = true, },				{					args = {"foo"}, expect = false, },			},		},		kebabCase = { params = {"str"}, returns = "The string converted to kebab-case", cases = { outputOnly = true, {					args = {"This is a string"}, expect = "this-is-a-string", },			},		},		notBlank = { params = {"str"}, returns = " if and only if   does not contain only whitespace.", cases = { {					args = {" "}, expect = false, },				{					args = {"\n\n\n"}, expect = false, },				{					args = {nil}, expect = false, },				{					args = {"foo"}, expect = true, },			},		},		notEmpty = { params = {"str"}, returns = " if and only if   is neither nil nor an empty string.", cases = { {					args = {" "}, expect = true, },				{					args = {""}, expect = false, },				{					args = {nil}, expect = false, },			}		},		nilIfEmpty = { params = {"str"}, returns = " if value is nil or empty string, otherwise returns the given value.", cases = { outputOnly = true, {					args = {""}, expect = nil, },				{					args = {nil}, expect = nil, },				{					args = {" "}, expect = " ", },			},		},		startsWith = { params = {"str", "pattern"}, _params = {{"pattern"}, {"str"}}, returns = " if   starts with , else  .", cases = { {					args = {"Fooloo Limpah", "Foo"}, expect = true, },				{					args = {"Fooloo Limpah", "foo"}, expect = false, },				{					args = {"Fooloo Limpah", ""}, expect = true, },				{					args = {"foo", ""},					expect = true,				},			},		},		startsWithRegex = {			params = {"str", "pattern"},			_params = {{"pattern"}, {"str"}},			returns = " if   starts with regular expression , else  .",performant alternative to .",			params = {"str", "pattern"},			_params = {{"pattern"}, {"str"}},			returns = "The trimmed string.",			cases = {				outputOnly = true,				{					args = {"  foo"},					expect = "foo",				},				{					args = {":Category:Link", ":"},					expect = "Category:Link",				},			},		},		split = {			desc = "A performant alternative to  .",			params = {"str", "pattern", "plain"},			_params = {{"pattern", "plain"}, {"str"}},			returns = "A   of the split strings.",			cases = {				{					args = {" foo,    bar,baz "},					expect = {" foo", "bar", "baz "},				},				{					args = {"foo bar baz", " "},					expect = {"foo", "bar", "baz"},				},				{					desc = "Limited support for Unicode strings",					args = {"アイウエオ", ""},					expect = {"ア","イ","ウ","エ","オ"},				},			},		},		stripTrailingParentheses = {			params = {"str"},			returns = "The string minus any text in trailing parentheses.",			cases = {				outputOnly = true,				{					args = {"Link's Awakening (Nintendo Switch)"},					expect = "Link's Awakening",				},				{					args = {"foo (bar) baz"},					expect = "foo (bar) baz",				},			},		},		sub = {			desc = "Equivalent to  .",			params = {"str", "startIndex", "endIndex"},			_params = {{"startIndex", "endIndex"}, {"str"}},			returns = "Function returning a substring of   from   to   (inclusive).",			cases = {				outputOnly = true,				{					args = {"Fooloo Limpah", 8},					expect = "Limpah",				},				{					args = {"Fooloo Limpah", 1, 6},					expect = "Fooloo",				},				{					args = {"Fooloo Limpah", 20},					expect = "",				},				{					args = {"Fooloo Limpah", -20},					expect = "Fooloo Limpah",				},				{					args = {"Fooloo Limpah", 8, 20},					expect = "Limpah", 				},			},		},		interpolate = {			desc = "Approximation of string interpolation",			params = {"formatStr", "args"},			returns = "The formatted string.",			cases = {				outputOnly = true,				{					args = {"${wiki} is a ${franchise} encyclopedia that anyone can edit.", {						wiki = "Zelda Wiki",						franchise = "Zelda",					}},					expect = "Zelda Wiki is a Zelda encyclopedia that anyone can edit."				}			}		},	} end

return p