Module:UtilsString

From Zelda Wiki, the Zelda encyclopedia
Jump to navigation Jump to search

This module provides utility functions for manipulating Lua strings in general. For string manipulation and formatting that is specific to wikitext, see Module:UtilsMarkup.

This module re-implements some of the functions in the mw.text library. Use these functions whenever possible — mw.text is an order of magnitude slower because it uses mw.ustring.

Like Module:UtilsTable, some functions have both procedural and functional variants.

This module exports the following functions.

endsWith

endsWith_endsWith

endsWith(str, pattern)

Returns

  • true if str ends with pattern, else false.

Examples

#InputOutputResult
1
endsWith("Fooloo Limpah", "Limpah")
true
2
endsWith("Fooloo Limpah", "limpah")
false
3
endsWith("Fooloo Limpah", "")
true
4
endsWith("Wood (Character)", ")", true)
true

_endsWith(str)

Returns

  • true if str ends with pattern, else false.

Examples

#InputOutputResult
5
_endsWith("Limpah")("Fooloo Limpah")
true
6
_endsWith("limpah")("Fooloo Limpah")
false
7
_endsWith("")("Fooloo Limpah")
true
8
_endsWith(")")("Wood (Character)")
true

endsWithRegex

endsWithRegex_endsWithRegex

endsWithRegex(str, pattern)

Returns

  • true if str ends with regular expression pattern, else false.

Examples

#InputOutputResult
9
endsWithRegex("Wood (Character)", "%([^)]*%)")
true
10
endsWithRegex("Wood", "%([^)]*%)")
false

_endsWithRegex(str)

Returns

  • true if str ends with regular expression pattern, else false.

Examples

#InputOutputResult
11
_endsWithRegex("%([^)]*%)")("Wood (Character)")
true
12
_endsWithRegex("%([^)]*%)")("Wood")
false

interpolate

interpolate(formatStr, args)

Approximation of string interpolation

Parameters

Returns

  • The formatted string.

Examples

#InputOutputStatus
13
interpolate(
  "${wiki} is a ${franchise} encyclopedia that anyone can edit.",
  {
    wiki = "Zelda Wiki",
    franchise = "''Zelda''",
  }
)
"Zelda Wiki is a ''Zelda'' encyclopedia that anyone can edit."

isEmpty

isEmpty(str)

Returns

  • true if and only if the value is nil or ""

Examples

#InputOutputResult
14
isEmpty(nil)
true
15
isEmpty("")
true
16
isEmpty(" ")
false

isBlank

isBlank(str)

Returns

  • true if and only if str is nil, blank, or whitespace.

Examples

#InputOutputResult
17
isBlank("  ")
true
18
isBlank("\n\n\n")
true
19
isBlank(nil)
true
20
isBlank("foo")
false

kebabCase

kebabCase(str)

Returns

  • The string converted to kebab-case

Examples

#InputOutputStatus
21
kebabCase("This is a string")
"this-is-a-string"

nilIfEmpty

nilIfEmpty(str)

Returns

  • nil if value is nil or empty string, otherwise returns the given value.

Examples

#InputOutputStatus
22
nilIfEmpty("")
nil
23
nilIfEmpty(nil)
nil
24
nilIfEmpty(" ")
" "

notBlank

notBlank(str)

Returns

  • true if and only if str does not contain only whitespace.

Examples

#InputOutputResult
25
notBlank("  ")
false
26
notBlank("\n\n\n")
false
27
notBlank(nil)
false
28
notBlank("foo")
true

notEmpty

notEmpty(str)

Returns

  • true if and only if str is neither nil nor an empty string.

Examples

#InputOutputResult
29
notEmpty(" ")
true
30
notEmpty("")
false
31
notEmpty(nil)
false

split

split_split

split(str, [pattern], [plain])

A performant alternative to mw.text.split.

Parameters

Returns

  • A table of the split strings.

Examples

#InputOutputResult
32
split(" foo,    bar,baz ")
{" foo", "bar", "baz "}
33
split("foo bar baz", " ")
{"foo", "bar", "baz"}
Support for Unicode strings
34
split("アイウエオ", "")
{"ア", "イ", "ウ", "エ", "オ"}
35
split("グタンバチの祠, インイサの祠")
{"グタンバチの祠", "インイサの祠"}

_split(str)

A performant alternative to mw.text.split.

Parameters

Returns

  • A table of the split strings.

Examples

#InputOutputResult
36
_split()(" foo,    bar,baz ")
{" foo", "bar", "baz "}
37
_split(" ")("foo bar baz")
{"foo", "bar", "baz"}
Support for Unicode strings
38
_split("")("アイウエオ")
{"ア", "イ", "ウ", "エ", "オ"}
39
_split()("グタンバチの祠, インイサの祠")
{"グタンバチの祠", "インイサの祠"}

startsWith

startsWith_startsWith

startsWith(str, pattern)

Returns

  • true if str starts with pattern, else false.

Examples

#InputOutputResult
40
startsWith("Fooloo Limpah", "Foo")
true
41
startsWith("Fooloo Limpah", "foo")
false
42
startsWith("Fooloo Limpah", "")
true
43
startsWith("[[foo]]", "[[")
true

_startsWith(str)

Returns

  • true if str starts with pattern, else false.

Examples

#InputOutputResult
44
_startsWith("Foo")("Fooloo Limpah")
true
45
_startsWith("foo")("Fooloo Limpah")
false
46
_startsWith("")("Fooloo Limpah")
true
47
_startsWith("[[")("[[foo]]")
true

startsWithRegex

startsWithRegex_startsWithRegex

startsWithRegex(str, pattern)

Returns

  • true if str starts with regular expression pattern, else false.

Examples

#InputOutputResult
48
startsWithRegex("foo", "[af]")
true
49
startsWithRegex("aoo", "[af]")
true
50
startsWithRegex("boo", "[af]")
false

_startsWithRegex(str)

Returns

  • true if str starts with regular expression pattern, else false.

Examples

#InputOutputResult
51
_startsWithRegex("[af]")("foo")
true
52
_startsWithRegex("[af]")("aoo")
true
53
_startsWithRegex("[af]")("boo")
false

stripTrailingParentheses

stripTrailingParentheses(str)

Returns

  • The string minus any text in trailing parentheses.

Examples

#InputOutputStatus
54
stripTrailingParentheses("Link's Awakening (Nintendo Switch)")
"Link's Awakening"
55
stripTrailingParentheses("foo (bar) baz")
"foo (bar) baz"

sub

sub_sub

sub(str, startIndex, [endIndex])

Equivalent to string.sub.

Parameters

Returns

  • Function returning a substring of str from startIndex to endIndex (inclusive).

Examples

#InputOutputStatus
56
sub("Fooloo Limpah", 8)
"Limpah"
57
sub("Fooloo Limpah", 1, 6)
"Fooloo"
58
sub("Fooloo Limpah", 20)
""
59
sub("Fooloo Limpah", -20)
"Fooloo Limpah"
60
sub("Fooloo Limpah", 8, 20)
"Limpah"

_sub(str)

Equivalent to string.sub.

Parameters

Returns

  • Function returning a substring of str from startIndex to endIndex (inclusive).

Examples

#InputOutputStatus
61
_sub(8)("Fooloo Limpah")
"Limpah"
62
_sub(1, 6)("Fooloo Limpah")
"Fooloo"
63
_sub(20)("Fooloo Limpah")
""
64
_sub(-20)("Fooloo Limpah")
"Fooloo Limpah"
65
_sub(8, 20)("Fooloo Limpah")
"Limpah"

trim

trim_trim

trim(str, [pattern])

A performant alternative to mw.text.trim.

Parameters

Returns

  • The trimmed string.

Examples

#InputOutputStatus
66
trim("  foo")
"foo"
67
trim(":Category:Link", ":")
"Category:Link"
Unicode support
68
trim(" グタンバチの祠 ")
"グタンバチの祠"

_trim(str)

A performant alternative to mw.text.trim.

Parameters

Returns

  • The trimmed string.

Examples

#InputOutputStatus
69
_trim()("  foo")
"foo"
70
_trim(":")(":Category:Link")
"Category:Link"
Unicode support
71
_trim()(" グタンバチの祠 ")
"グタンバチの祠"

local p = {}
local h = {}

function p.endsWith(str, pattern)
	return h.endsWith(str, pattern, true)
end
function p.endsWithRegex(str, pattern)
	return h.endsWith(str, pattern, false)
end
function p._endsWith(pattern)
	return function(str)
		return h.endsWith(str, pattern, true)
	end
end
function p._endsWithRegex(pattern)
	return function(str)
		return h.endsWith(str, pattern, false)
	end
end
function h.endsWith(str, pattern, plain)
	local endIndex = 0
	repeat
		endIndex = select(2, str:find(pattern, endIndex + 1, plain))
	until endIndex == nil or endIndex == #str or endIndex == 0
	return endIndex ~= nil
end

-- By http://lua-users.org/wiki/RiciLake
function p.interpolate(formatStr, tab)
  return (formatStr:gsub('($%b{})', function(w) return tab[w:sub(3, -2)] or w end))
end

function p.isEmpty(str)
	return str == nil or str == ""
end

function p.isBlank(str)
	return str == nil or p.trim(str) == "" or str == " " -- fullwidth space
end

function p.kebabCase(str)
	str = string.gsub(str, "(%u)", "-%1")
	str = string.gsub(str, "^%-", "")
	str = string.lower(str)
	str = string.gsub(str, " ", "-")
	return str
end

function p.nilIfEmpty(str)
	if str and str ~= "" then
		return str
	end
end

function p.notBlank(str)
	return not p.isBlank(str)
end

function p.notEmpty(str)
	return not p.isEmpty(str)
end

function p._split(pattern, plain)
	return function(str)
		return p.split(str, pattern, plain)
	end
end
function p.split(text, pattern, plain)
	pattern = pattern or "%s*,%s*"
	local ret = {}

	if pattern == "" then
		for m in string.gmatch(text, "([%z\1-\127\194-\244][\128-\191]*)") do
          ret[#ret+1] = m
		end
		return ret
	end

	local i = 1
	while true do
		local startIndex, endIndex = string.find(text, pattern, i, plain)
		local s = string.sub(text, i, startIndex and startIndex-1)
		table.insert(ret, s)
		if startIndex == nil then
			break
		end
		i = endIndex+1
	end
	return ret
end

function p.startsWith(str, pattern)
	return h.startsWith(str, pattern, true)
end
function p.startsWithRegex(str, pattern)
	return h.startsWith(str, pattern, false)
end
function p._startsWith(pattern)
	return function(str)
		return h.startsWith(str, pattern, true)
	end
end
function p._startsWithRegex(pattern)
	return function(str)
		return h.startsWith(str, pattern, false)
	end
end
function h.startsWith(str, pattern, plain)
	return str:find(pattern, 1, plain) == 1
end

function p.stripTrailingParentheses(str)
	local endMarker = "${endMarker}"
	local index = string.find(str..endMarker, " %(.*%)"..endMarker)
	if index ~= nil then
		str = string.sub(str, 1, index-1)
	end
	return str
end

function p.sub(str, s, e)
	return p._sub(s, e)(str)
end
function p._sub(s, e)
	return function(str)
		return string.sub(str, s, e)
	end
end

-- Source: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua
function p.trim(s, charset)
	return p._trim(charset)(s)
end
function p._trim(charset)
	charset = charset or '\t\r\n\f%s'
	return function(s)
		s = s:gsub( '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
		return s
	end
end

function p.Schemas()
	return {
		split = {
			str = {
				type = "string",
				required = true,
			},
			pattern = {
				type = "string",
				default = mw.dumpObject("%s*,%s*"),
			},
			plain = {
				type = "boolean",
			},
		},
		sub = {
			str = {
				type = "string",
				required = true,
			},
			startIndex = {
				type = "number",
				required = true,
			},
			endIndex = {
				type = "number",
				default = "#str",
			},
		},
		trim = {
			pattern = {
				type = "string",
			},
			str = {
				type = "string",
				required = true,
			},
		},
		format = {
			formatStr = {
				type = "string",
				required = true,
			},
			["..."] = {
				type = "array",
				items = {
					type = "string",
				},
				required = true,
			},
		},
		interpolate = {
			formatStr = {
				type = "string",
				required = true,
			},
			args = {
				type = "map",
				required = true,
				keys = { type = "string" },
				values = { type = "string" },
			},
		},
	}
end

function p.Documentation()
	return {
		isEmpty = {
			params = {"str"},
			returns = '<code>true</code> if and only if the value is <code>nil</code> or <code>""</code>',
			cases = {
				{
					args = {nil},
					expect = true,
				},
				{
					args = {""},
					expect = true,
				},
				{
					args = {" "},
					expect = false,
				},
			},
		},
		isBlank = {
			params = {"str"},
			returns = "<code>true</code> if and only if <code>str</code> is nil, blank, or whitespace.",
			cases = {
				{
					args = {"  "},
					expect = true,
				},
				{
					args = {"\n\n\n"},
					expect = true,
				},
				{
					args = {nil},
					expect = true,
				},
				{
					args = {"foo"},
					expect = false,
				},
			},
		},
		kebabCase = {
			params = {"str"},
			returns = "The string converted to kebab-case",
			cases = {
				outputOnly = true,
				{
					args = {"This is a string"},
					expect = "this-is-a-string",
				},
			},
		},
		notBlank = {
			params = {"str"},
			returns = "<code>true</code> if and only if <code>str</code> does not contain only whitespace.",
			cases = {
				{
					args = {"  "},
					expect = false,
				},
				{
					args = {"\n\n\n"},
					expect = false,
				},
				{
					args = {nil},
					expect = false,
				},
				{
					args = {"foo"},
					expect = true,
				},
			},
		},
		notEmpty = {
			params = {"str"},
			returns = "<code>true</code> if and only if <code>str</code> is neither nil nor an empty string.",
			cases = {
				{
					args = {" "},
					expect = true,
				},
				{
					args = {""},
					expect = false,
				},
				{
					args = {nil},
					expect = false,
				},
			}
		},
		nilIfEmpty = {
			params = {"str"},
			returns = "<code>nil</code> if value is nil or empty string, otherwise returns the given value.",
			cases = {
				outputOnly = true,
				{
					args = {""},
					expect = nil,	
				},
				{
					args = {nil},
					expect = nil,
				},
				{
					args = {" "},
					expect = " ",
				},
			},
		},
		startsWith = {
			params = {"str", "pattern"},
			_params = {{"pattern"}, {"str"}},
			returns = "<code>true</code> if <code>str</code> starts with <code>pattern</code>, else <code>false</code>.",
			cases = {
				{
					args = {"Fooloo Limpah", "Foo"},
					expect = true,
				},
				{
					args = {"Fooloo Limpah", "foo"},
					expect = false,
				},
				{
					args = {"Fooloo Limpah", ""},
					expect = true,
				},
				{
					args = {"[[foo]]", "[["},
					expect = true,
				},
			},
		},
		startsWithRegex = {
			params = {"str", "pattern"},
			_params = {{"pattern"}, {"str"}},
			returns = "<code>true</code> if <code>str</code> starts with regular expression <code>pattern</code>, else <code>false</code>.",
			cases = {
				{
					args = {"foo", "[af]"},
					expect = true,
				},
				{
					args = {"aoo", "[af]"},
					expect = true,
				},
				{
					args = {"boo", "[af]"},
					expect = false,
				},
			}
		},
		endsWith = {
			params = {"str", "pattern"},
			_params = {{"pattern"}, {"str"}},
			returns = "<code>true</code> if <code>str</code> ends with <code>pattern</code>, else <code>false</code>.",
			cases = {
				{
					args = {"Fooloo Limpah", "Limpah"},
					expect = true,
				},
				{
					args = {"Fooloo Limpah", "limpah"},
					expect = false,
				},
				{
					args = {"Fooloo Limpah", ""},
					expect = true,
				},
				{
					args = {"Wood (Character)", ")", true},
					expect = true,
				}
			},
		},
		endsWithRegex = {
			params = {"str", "pattern"},
			_params = {{"pattern"}, {"str"}},
			returns = "<code>true</code> if <code>str</code> ends with regular expression <code>pattern</code>, else <code>false</code>.",
			cases = {
				{
					args = {"Wood (Character)", "%([^)]*%)"},
					expect = true,
				},
				{
					args = {"Wood", "%([^)]*%)"},
					expect = false,
				},
			},
		},
		trim = {
			desc = "A [[gphelp:Extension:Scribunto#mw.text.trim is slow|performant alternative]] to {{Scribunto Manual|lib=mw.text.trim}}.",
			params = {"str", "pattern"},
			_params = {{"pattern"}, {"str"}},
			returns = "The trimmed string.",
			cases = {
				outputOnly = true,
				{
					args = {"  foo"},
					expect = "foo",
				},
				{
					args = {":Category:Link", ":"},
					expect = "Category:Link",
				},
				{
					desc = "Unicode support",
					args = {" グタンバチの祠 "},
					expect = "グタンバチの祠",
				},
			},
		},
		split = {
			desc = "A [[gphelp:Extension:Scribunto#mw.text.split is very slow|performant alternative]] to  {{Scribunto Manual|lib=mw.text.split}}.",
			params = {"str", "pattern", "plain"},
			_params = {{"pattern", "plain"}, {"str"}},
			returns = "A <code>table</code> of the split strings.",
			cases = {
				{
					args = {" foo,    bar,baz "},
					expect = {" foo", "bar", "baz "},
				},
				{
					args = {"foo bar baz", " "},
					expect = {"foo", "bar", "baz"},
				},
				{
					desc = "Support for Unicode strings",
					args = {"アイウエオ", ""},
					expect = {"ア","イ","ウ","エ","オ"},
				},
				{
					args = {"グタンバチの祠, インイサの祠"},
					expect = {"グタンバチの祠", "インイサの祠"},
				}
			},
		},
		stripTrailingParentheses = {
			params = {"str"},
			returns = "The string minus any text in trailing parentheses.",
			cases = {
				outputOnly = true,
				{
					args = {"Link's Awakening (Nintendo Switch)"},
					expect = "Link's Awakening",
				},
				{
					args = {"foo (bar) baz"},
					expect = "foo (bar) baz",
				},
			},
		},
		sub = {
			desc = "Equivalent to <code>string.sub</code>.",
			params = {"str", "startIndex", "endIndex"},
			_params = {{"startIndex", "endIndex"}, {"str"}},
			returns = "Function returning a substring of <code>str</code> from <code>startIndex</code> to <code>endIndex</code> (inclusive).",
			cases = {
				outputOnly = true,
				{
					args = {"Fooloo Limpah", 8},
					expect = "Limpah",
				},
				{
					args = {"Fooloo Limpah", 1, 6},
					expect = "Fooloo",
				},
				{
					args = {"Fooloo Limpah", 20},
					expect = "",
				},
				{
					args = {"Fooloo Limpah", -20},
					expect = "Fooloo Limpah",
				},
				{
					args = {"Fooloo Limpah", 8, 20},
					expect = "Limpah", 
				},
			},
		},
		interpolate = {
			desc = "Approximation of [http://lua-users.org/wiki/StringInterpolation string interpolation]",
			params = {"formatStr", "args"},
			returns = "The formatted string.",
			cases = {
				outputOnly = true,
				{
					args = {"${wiki} is a ${franchise} encyclopedia that anyone can edit.", {
						wiki = "Zelda Wiki",
						franchise = "''Zelda''",
					}},
					expect = "Zelda Wiki is a ''Zelda'' encyclopedia that anyone can edit."
				}
			}
		},
	}
end

return p