Module:FileInfo/Reports

-- This function is only used on Template:FileInfo/Reports -- so it lives in its own module instead of Module:File or Module:FileInfo

-- This way we can change it without triggering a massive job queue -- for all the pages linked by those high-usage modules local p = {}

local utilsCargo = require("Module:UtilsCargo") local utilsLayout = require("Module:UtilsLayout") local utilsMarkup = require("Module:UtilsMarkup") local utilsPage = require("Module:UtilsPage") local utilsTable = require("Module:UtilsTable")

-- Queries Cargo for the 100 most-used subjects, uses DPL to determine which ones don't exist as categories yet. -- Repeat for the next 100 subjects, and so on until there are ~100 table rows or no more subjects to process. function p.MostWantedSubjectCategories(frame) local BATCH_SIZE = 100 local MAX_ROWS = 100 local offset = 0 local rows = {} local listedCategories = {} local cargoResults repeat cargoResults = utilsCargo.query("Files, Files__subject", "Files._pageName=page, Files__subject._value=subject, COUNT(*)=count", {			groupBy = "_value",			orderBy = "COUNT(*) DESC",			join = "Files._ID = Files__subject._rowID",			where = " Files__subject._value != ''",			limit = BATCH_SIZE,			offset = offset		}) offset = offset + BATCH_SIZE local dplArgs = { namespace = "Category", titlematch = utilsTable.map(cargoResults, function(result)				return "Images of "..result.subject			end), redirects = "include", -- some "Images of X" categories are redirects to synonymous terms (see Category:Synonymous Files) }		dplArgs.namespace = "Category" dplArgs.redirects = "include" local existingCategories = utilsPage.dpl(dplArgs) local existingCategoryLookup = utilsTable.invert(existingCategories) for _, result in ipairs(cargoResults) do			local cat = "Category:Images of "..result.subject if not listedCategories[cat] and not existingCategoryLookup[cat] then table.insert(rows, { utilsMarkup.link(cat), result.count, utilsMarkup.link(result.page)}) listedCategories[cat] = true -- because of the way we batch the cargo queries, we need keep track of results that were already added end end until #rows >= MAX_ROWS or #cargoResults == 0 return utilsLayout.table({		sortable = true,		headers = {"Category", "Count", "Sample File"},		rows = rows	}) end

return p