Module:Category handler: Difference between revisions
Jump to navigation
Jump to search
add blacklist check |
add the rest of the features, improve the comments |
||
Line 1: | Line 1: | ||
-- Configuration data. | ---------------------------------------------------------------------- | ||
-- -- | |||
-- CATEGORY HANDLER -- | |||
-- -- | |||
-- This module implements the {{category handler}} template -- | |||
-- in Lua, with a few improvements: all namespaces and all -- | |||
-- namespace aliases are supported, and namespace names are -- | |||
-- detected automatically for the local wiki. This module -- | |||
-- requires [[Module:Namespace detect]] to be available on -- | |||
-- the local wiki. It can be configured for different wikis -- | |||
-- by altering the values in the "cfg" table. -- | |||
-- -- | |||
---------------------------------------------------------------------- | |||
---------------------------------------------------------------------- | |||
-- Configuration data -- | |||
-- Language-specific parameter names and values can be set -- | |||
-- here. -- | |||
---------------------------------------------------------------------- | |||
local cfg = {} | local cfg = {} | ||
-- cfg.nocat is the parameter name to suppress categorisation. | |||
-- cfg.nocatTrue is the value to suppress categorisation, and | |||
-- cfg.nocatFalse is the value to both categorise and to skip the | |||
-- blacklist check. | |||
cfg.nocat = 'nocat' | cfg.nocat = 'nocat' | ||
cfg.nocatTrue = 'true' | |||
cfg.nocatFalse = 'false' | |||
-- The parameter name for the legacy "categories" parameter. | |||
cfg.categories = 'categories' | cfg.categories = 'categories' | ||
cfg.categoriesYes = 'yes' | |||
-- The parameter name for the legacy "category2" parameter. This | |||
-- skips the blacklist if set to the cfg.category2Yes value, and | |||
-- suppresses categorisation if present but equal to anything other | |||
-- than cfg.category2Yes. | |||
cfg.category2 = 'category2' | |||
cfg.category2Yes = 'yes' | |||
-- cfg.subpage is the parameter name to specify how to behave on | |||
-- subpages. cfg.subpageNo is the value to specify to not | |||
-- categorise on subpages; cfg.only is the value to specify to only | |||
-- categorise on subpages. | |||
cfg.subpage = 'subpage' | cfg.subpage = 'subpage' | ||
cfg. | cfg.subpageNo = 'no' | ||
cfg. | cfg.subpageOnly = 'only' | ||
-- The parameter for data to return in all namespaces. | |||
cfg.all = 'all' | cfg.all = 'all' | ||
cfg. | |||
-- The parameter name for data to return if no data is specified for | |||
-- the namespace that is detected. This must be the same as the | |||
-- cfg.other parameter in [[Module:Namespace detect]]. | |||
cfg.other = 'other' | cfg.other = 'other' | ||
-- The parameter name used to specify a page other than the current | |||
-- page; used for testing and demonstration. This must be the same | |||
-- as the cfg.page parameter in [[Module:Namespace detect]]. | |||
cfg.page = 'page' | |||
-- The categorisation blacklist. Pages that match Lua patterns in this | -- The categorisation blacklist. Pages that match Lua patterns in this | ||
Line 34: | Line 84: | ||
} | } | ||
-- Module | -- This is a table of namespaces to categorise by default. | ||
cfg.defaultNamespaces = { | |||
0, -- Main | |||
6, -- File | |||
12, -- Help | |||
14 -- Category | |||
} | |||
---------------------------------------------------------------------- | |||
-- End configuration data -- | |||
---------------------------------------------------------------------- | |||
-- Get dependent modules and declare the table of functions that we will | |||
-- return. | |||
local NamespaceDetect = require('Module:Namespace detect') | |||
local p = {} | local p = {} | ||
-- | ---------------------------------------------------------------------- | ||
-- | -- Local functions -- | ||
-- | -- The following are internal functions, which we do not want -- | ||
-- to be accessible from other modules. -- | |||
---------------------------------------------------------------------- | |||
-- Find whether we need to return a category or not. | -- Find whether we need to return a category or not. | ||
local function needsCategory( pageObject ) | local function needsCategory( pageObject, args ) | ||
if not pageObject then return end | -- If there is no pageObject available, then that either means that we are over | ||
if args[cfg.nocat] == | -- the expensive function limit or that the title specified was invalid. Invalid | ||
or ( args[cfg.category2] and args[cfg.category2] ~= | -- titles will probably only be a problem during testing, so choose the best | ||
or ( args[cfg.subpage] == | -- default for being over the expensive function limit, i.e. categorise the page. | ||
or ( args[cfg.subpage] == | if not pageObject then | ||
return true | |||
end | |||
-- Only categorise if the relevant options are set. | |||
if args[cfg.nocat] == cfg.nocatTrue | |||
or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes ) | |||
or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage ) | |||
or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then | |||
return false | return false | ||
else | else | ||
Line 72: | Line 128: | ||
-- Find whether we need to check the blacklist or not. | -- Find whether we need to check the blacklist or not. | ||
local function needsBlacklistCheck() | local function needsBlacklistCheck( args ) | ||
if args[cfg.nocat] == | if args[cfg.nocat] == cfg.nocatFalse | ||
or args[cfg.categories] == | or args[cfg.categories] == cfg.categoriesYes | ||
or args[cfg.category2] == | or args[cfg.category2] == cfg.category2Yes then | ||
return false | return false | ||
else | else | ||
Line 85: | Line 141: | ||
-- string searched is the namespace plus the title, including subpages. | -- string searched is the namespace plus the title, including subpages. | ||
-- Returns true if there is a match, otherwise returns false. | -- Returns true if there is a match, otherwise returns false. | ||
local function findBlacklistMatch(pageObject) | local function findBlacklistMatch( pageObject ) | ||
if not pageObject then return end | if not pageObject then return end | ||
Line 106: | Line 162: | ||
end | end | ||
local function _main() | -- Find whether any namespace parameters have been specified. | ||
local pageObject = getPageObject() | -- Mappings is the table of parameter mappings taken from | ||
if not needsCategory( pageObject ) then return end | -- [[Module:Namespace detect]]. | ||
if needsBlacklistCheck() then | local function nsParamsExist( mappings, args ) | ||
return | if args[cfg.all] or args[cfg.other] then | ||
return true | |||
end | |||
for ns, params in pairs( mappings ) do | |||
for i, param in ipairs( params ) do | |||
if args[param] then | |||
return true | |||
end | |||
end | |||
end | |||
return false | |||
end | |||
-- The main structure of the module. Checks whether we need to categorise, | |||
-- and then passes the relevant arguments to [[Module:Namespace detect]]. | |||
local function _main( args ) | |||
-- Get the page object and argument mappings from | |||
-- [[Module:Namespace detect]], to save us from having to rewrite the | |||
-- code. | |||
local pageObject = NamespaceDetect.getPageObject() | |||
local mappings = NamespaceDetect.getParamMappings() | |||
-- Check if we need a category or not, and return nothing if not. | |||
if not needsCategory( pageObject, args ) then return end | |||
local ret = '' -- The string to return. | |||
if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then | |||
if not nsParamsExist( mappings, args ) then | |||
-- No namespace parameters exist; basic usage. | |||
local ndargs = {} | |||
for _, nsid in ipairs( cfg.defaultNamespaces ) do | |||
ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1] | |||
end | |||
ndargs.page = args.page | |||
local ndresult = NamespaceDetect.main( ndargs ) | |||
if ndresult then | |||
ret = ret .. ndresult | |||
end | |||
else | |||
-- Namespace parameters exist; advanced usage. | |||
-- If the all parameter is specified, return it. | |||
if args.all then | |||
ret = ret .. args.all | |||
end | |||
-- Get the arguments to pass to [[Module:Namespace detect]]. | |||
local ndargs = {} | |||
for ns, params in pairs( mappings ) do | |||
for _, param in ipairs( params ) do | |||
ndargs[param] = args[param] or args[cfg.other] or nil | |||
end | |||
end | |||
if args.other then | |||
ndargs.other = args.other | |||
end | |||
if args.page then | |||
ndargs.page = args.page | |||
end | |||
local data = NamespaceDetect.main( ndargs ) | |||
-- Work out what to return based on the result of the namespace | |||
-- detect call. | |||
local datanum = tonumber( data ) | |||
if type( datanum ) == 'number' then | |||
-- "data" is a number, so return that positional parameter. | |||
-- Remove non-positive integer values, as only positive integers | |||
-- from 1-10 were used with the old template. | |||
if datanum > 0 | |||
and math.floor( datanum ) == datanum | |||
and args[datanum] then | |||
ret = ret .. args[ datanum ] | |||
end | |||
else | |||
-- "data" is not a number, so return it as it is. | |||
if type(data) == 'string' then | |||
ret = ret .. data | |||
end | |||
end | |||
end | |||
end | end | ||
return ret | |||
end | end | ||
-- | ---------------------------------------------------------------------- | ||
function p.main(frame) | -- Global functions -- | ||
-- The following functions are global, because we want them -- | |||
-- to be accessible from #invoke and from other Lua modules. -- | |||
-- At the moment only the main function is here. It processes -- | |||
-- the arguments and passes them to the _main function. -- | |||
---------------------------------------------------------------------- | |||
function p.main( frame ) | |||
-- If called via #invoke, use the args passed into the invoking | -- If called via #invoke, use the args passed into the invoking | ||
-- template, or the args passed to #invoke if any exist. Otherwise | -- template, or the args passed to #invoke if any exist. Otherwise | ||
Line 129: | Line 271: | ||
origArgs = frame | origArgs = frame | ||
end | end | ||
-- Trim whitespace and remove blank arguments for the following args: | -- Trim whitespace and remove blank arguments for the following args: | ||
-- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page". | -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page". | ||
for k, v in pairs(origArgs) do | local args = {} | ||
for k, v in pairs( origArgs ) do | |||
v = mw.text.trim(v) -- Trim whitespace. | v = mw.text.trim(v) -- Trim whitespace. | ||
if type(k) == 'number' | if type(k) == 'number' | ||
Line 161: | Line 290: | ||
end | end | ||
-- Lower-case "nocat", "categories", "category2", and "subpage". | -- Lower-case "nocat", "categories", "category2", and "subpage". These | ||
-- parameters are put in lower case whenever they appear in the old | |||
-- template, so we can just do it once here and save ourselves some work. | |||
local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage } | local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage } | ||
for _, v in ipairs( lowercase ) do | for _, v in ipairs( lowercase ) do | ||
Line 169: | Line 300: | ||
end | end | ||
return _main() | return _main( args ) | ||
end | end | ||
return p | return p |
Revision as of 09:36, 1 July 2013
Documentation for this module may be created at Module:Category handler/doc
----------------------------------------------------------------------
-- --
-- CATEGORY HANDLER --
-- --
-- This module implements the {{category handler}} template --
-- in Lua, with a few improvements: all namespaces and all --
-- namespace aliases are supported, and namespace names are --
-- detected automatically for the local wiki. This module --
-- requires [[Module:Namespace detect]] to be available on --
-- the local wiki. It can be configured for different wikis --
-- by altering the values in the "cfg" table. --
-- --
----------------------------------------------------------------------
----------------------------------------------------------------------
-- Configuration data --
-- Language-specific parameter names and values can be set --
-- here. --
----------------------------------------------------------------------
local cfg = {}
-- cfg.nocat is the parameter name to suppress categorisation.
-- cfg.nocatTrue is the value to suppress categorisation, and
-- cfg.nocatFalse is the value to both categorise and to skip the
-- blacklist check.
cfg.nocat = 'nocat'
cfg.nocatTrue = 'true'
cfg.nocatFalse = 'false'
-- The parameter name for the legacy "categories" parameter.
cfg.categories = 'categories'
cfg.categoriesYes = 'yes'
-- The parameter name for the legacy "category2" parameter. This
-- skips the blacklist if set to the cfg.category2Yes value, and
-- suppresses categorisation if present but equal to anything other
-- than cfg.category2Yes.
cfg.category2 = 'category2'
cfg.category2Yes = 'yes'
-- cfg.subpage is the parameter name to specify how to behave on
-- subpages. cfg.subpageNo is the value to specify to not
-- categorise on subpages; cfg.only is the value to specify to only
-- categorise on subpages.
cfg.subpage = 'subpage'
cfg.subpageNo = 'no'
cfg.subpageOnly = 'only'
-- The parameter for data to return in all namespaces.
cfg.all = 'all'
-- The parameter name for data to return if no data is specified for
-- the namespace that is detected. This must be the same as the
-- cfg.other parameter in [[Module:Namespace detect]].
cfg.other = 'other'
-- The parameter name used to specify a page other than the current
-- page; used for testing and demonstration. This must be the same
-- as the cfg.page parameter in [[Module:Namespace detect]].
cfg.page = 'page'
-- The categorisation blacklist. Pages that match Lua patterns in this
-- list will not be categorised unless the appropriate options are set.
-- If the namespace name has a space in, it must be written with an
-- underscore, e.g. "Wikipedia_talk". Other parts of the title can have
-- either underscores or spaces.
cfg.blacklist = {
'^Main Page$', -- don't categorise the main page.
-- Don't categorise the following pages or their subpages.
'^Wikipedia:Cascade%-protected items$',
'^Wikipedia:Cascade%-protected items/.*$',
'^User:UBX$', -- The userbox "template" space.
'^User:UBX/.*$',
'^User_talk:UBX$',
'^User_talk:UBX/.*$',
-- Don't categorise subpages of these pages, but allow
-- categorisation of the base page.
'^Wikipedia:Template messages/.*$',
'/[aA]rchive' -- Don't categorise archives.
}
-- This is a table of namespaces to categorise by default.
cfg.defaultNamespaces = {
0, -- Main
6, -- File
12, -- Help
14 -- Category
}
----------------------------------------------------------------------
-- End configuration data --
----------------------------------------------------------------------
-- Get dependent modules and declare the table of functions that we will
-- return.
local NamespaceDetect = require('Module:Namespace detect')
local p = {}
----------------------------------------------------------------------
-- Local functions --
-- The following are internal functions, which we do not want --
-- to be accessible from other modules. --
----------------------------------------------------------------------
-- Find whether we need to return a category or not.
local function needsCategory( pageObject, args )
-- If there is no pageObject available, then that either means that we are over
-- the expensive function limit or that the title specified was invalid. Invalid
-- titles will probably only be a problem during testing, so choose the best
-- default for being over the expensive function limit, i.e. categorise the page.
if not pageObject then
return true
end
-- Only categorise if the relevant options are set.
if args[cfg.nocat] == cfg.nocatTrue
or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes )
or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage )
or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then
return false
else
return true
end
end
-- Find whether we need to check the blacklist or not.
local function needsBlacklistCheck( args )
if args[cfg.nocat] == cfg.nocatFalse
or args[cfg.categories] == cfg.categoriesYes
or args[cfg.category2] == cfg.category2Yes then
return false
else
return true
end
end
-- Searches the blacklist to find a match with the page object. The
-- string searched is the namespace plus the title, including subpages.
-- Returns true if there is a match, otherwise returns false.
local function findBlacklistMatch( pageObject )
if not pageObject then return end
-- Get the title to check.
local title = pageObject.nsText -- Get the namespace.
-- Append a colon if the namespace isn't the blank string.
if #title > 0 then
title = title .. ':' .. pageObject.text
else
title = pageObject.text
end
-- Check the blacklist.
for i, pattern in ipairs( cfg.blacklist ) do
if mw.ustring.match( title, pattern ) then
return true
end
end
return false
end
-- Find whether any namespace parameters have been specified.
-- Mappings is the table of parameter mappings taken from
-- [[Module:Namespace detect]].
local function nsParamsExist( mappings, args )
if args[cfg.all] or args[cfg.other] then
return true
end
for ns, params in pairs( mappings ) do
for i, param in ipairs( params ) do
if args[param] then
return true
end
end
end
return false
end
-- The main structure of the module. Checks whether we need to categorise,
-- and then passes the relevant arguments to [[Module:Namespace detect]].
local function _main( args )
-- Get the page object and argument mappings from
-- [[Module:Namespace detect]], to save us from having to rewrite the
-- code.
local pageObject = NamespaceDetect.getPageObject()
local mappings = NamespaceDetect.getParamMappings()
-- Check if we need a category or not, and return nothing if not.
if not needsCategory( pageObject, args ) then return end
local ret = '' -- The string to return.
if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then
if not nsParamsExist( mappings, args ) then
-- No namespace parameters exist; basic usage.
local ndargs = {}
for _, nsid in ipairs( cfg.defaultNamespaces ) do
ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1]
end
ndargs.page = args.page
local ndresult = NamespaceDetect.main( ndargs )
if ndresult then
ret = ret .. ndresult
end
else
-- Namespace parameters exist; advanced usage.
-- If the all parameter is specified, return it.
if args.all then
ret = ret .. args.all
end
-- Get the arguments to pass to [[Module:Namespace detect]].
local ndargs = {}
for ns, params in pairs( mappings ) do
for _, param in ipairs( params ) do
ndargs[param] = args[param] or args[cfg.other] or nil
end
end
if args.other then
ndargs.other = args.other
end
if args.page then
ndargs.page = args.page
end
local data = NamespaceDetect.main( ndargs )
-- Work out what to return based on the result of the namespace
-- detect call.
local datanum = tonumber( data )
if type( datanum ) == 'number' then
-- "data" is a number, so return that positional parameter.
-- Remove non-positive integer values, as only positive integers
-- from 1-10 were used with the old template.
if datanum > 0
and math.floor( datanum ) == datanum
and args[datanum] then
ret = ret .. args[ datanum ]
end
else
-- "data" is not a number, so return it as it is.
if type(data) == 'string' then
ret = ret .. data
end
end
end
end
return ret
end
----------------------------------------------------------------------
-- Global functions --
-- The following functions are global, because we want them --
-- to be accessible from #invoke and from other Lua modules. --
-- At the moment only the main function is here. It processes --
-- the arguments and passes them to the _main function. --
----------------------------------------------------------------------
function p.main( frame )
-- If called via #invoke, use the args passed into the invoking
-- template, or the args passed to #invoke if any exist. Otherwise
-- assume args are being passed directly in.
local origArgs
if frame == mw.getCurrentFrame() then
origArgs = frame:getParent().args
for k, v in pairs( frame.args ) do
origArgs = frame.args
break
end
else
origArgs = frame
end
-- Trim whitespace and remove blank arguments for the following args:
-- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
local args = {}
for k, v in pairs( origArgs ) do
v = mw.text.trim(v) -- Trim whitespace.
if type(k) == 'number'
or k == cfg.nocat
or k == cfg.categories
or k == cfg.subpage
or k == cfg.page then
if v ~= '' then
args[k] = v
end
else
args[k] = v
end
end
-- Lower-case "nocat", "categories", "category2", and "subpage". These
-- parameters are put in lower case whenever they appear in the old
-- template, so we can just do it once here and save ourselves some work.
local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
for _, v in ipairs( lowercase ) do
if args[v] then
args[v] = mw.ustring.lower( args[v] )
end
end
return _main( args )
end
return p