Module:Unicode data/patterns
Documentation for this module may be created at Module:Unicode data/patterns/doc
local export = {}
local Array = require "Module:array"
local function numeric_character_reference(code_point)
return ("&#x%04X;"):format(code_point)
end
function export.all_ranges_per_value(data_module)
local value_to_ranges = setmetatable({}, {
__index = function(self, key)
local value = Array()
self[key] = value
return value
end,
})
for code_point, value in pairs(data_module.singles) do
value_to_ranges[value]:insert { code_point, code_point }
end
for _, range in ipairs(data_module.ranges) do
local low, high, value = unpack(range)
value_to_ranges[value]:insert { low, high }
end
return value_to_ranges
end
function export.ranges_per_value(data_module, value_to_find)
local ranges = Array()
for code_point, value in pairs(data_module.singles) do
if value == value_to_find then
ranges:insert { code_point, code_point }
end
end
for _, range in ipairs(data_module.ranges) do
local low, high, value = unpack(range)
if value == value_to_find then
ranges:insert { low, high }
end
end
return ranges
end
local function sort_ranges(ranges)
table.sort(
ranges,
function (a, b)
return a[1] < b[1]
end)
end
-- Makes a pattern suitable to put inside [...] or [^...]
-- in a Lua pattern or regular expression.
local function make_pattern(ranges, char_ref)
local output = Array()
for _, range in ipairs(ranges) do
if char_ref then
output:insert(numeric_character_reference(range[1]))
else
output:insert(mw.ustring.char(range[1]))
end
if range[1] ~= range[2] then
output:insert "-"
if char_ref then
output:insert(numeric_character_reference(range[2]))
else
output:insert(mw.ustring.char(range[2]))
end
end
end
return output:concat()
end
-- Assumes ranges are sorted and that only one range has bad characters.
-- Treats all characters U+0000-U+001F as invalid in wikitext, but only some are.
local function sanitize_ranges(ranges)
for i, range in ipairs(ranges) do
if 0 <= range[1] and range[1] <= 0x1F then
if 0 <= range[2] and range[2] <= 0x1F then
table.remove(ranges, i)
break
else
range[1] = 0x20
end
end
end
end
function export.make_pattern(frame)
local module_name = frame.args.module
if not module_name then
error("Provide name of submodule of Module:Unicode data in |module= parameter.")
end
local value = frame.args.value
if not value then
error("Provide value to search for in |value= parameter.")
end
local ranges = export.ranges_per_value(require("Module:Unicode data/" .. module_name), value)
sanitize_ranges(ranges)
return make_pattern(ranges, false)
end
function export.show_all_patterns(frame)
local module_name = frame.args.module
if not module_name then
error("Provide name of submodule of Module:Unicode data in |module=.")
end
local value_to_ranges = export.all_ranges_per_value(require("Module:Unicode data/" .. module_name))
for _, ranges in pairs(value_to_ranges) do
sort_ranges(ranges)
end
local output = Array()
for value, ranges in require "Module:table".sortedPairs(value_to_ranges) do
output:insert("\n* " .. value .. ": ")
output:insert "<code>"
output:insert(make_pattern(ranges, true))
output:insert "</code>"
end
return output:concat()
end
return export