Module:Cite Wikidata/sandbox

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
Lua

CodeDiscussionEditHistoryLinksLink count Subpages:DocumentationTestsResultsSandboxLive code All modules

Code

--[[  
  __  __           _       _         ____ _ _        __        ___ _    _     _       _        
 |  \/  | ___   __| |_   _| | ___ _ / ___(_) |_ ___  \ \      / (_) | _(_) __| | __ _| |_ __ _ 
 | |\/| |/ _ \ / _` | | | | |/ _ (_) |   | | __/ _ \  \ \ /\ / /| | |/ / |/ _` |/ _` | __/ _` |
 | |  | | (_) | (_| | |_| | |  __/_| |___| | ||  __/   \ V  V / | |   <| | (_| | (_| | || (_| |
 |_|  |_|\___/ \__,_|\__,_|_|\___(_)\____|_|\__\___|    \_/\_/  |_|_|\_\_|\__,_|\__,_|\__\__,_|

This module is intended for creating citation templates based on wikidata items.

Please do not modify this code without applying the changes first at Module:Cite Wikidata/sandbox and testing 
at Module:Cite Wikidata/sandbox/testcases and Module talk:Cite Wikidata/sandbox/testcases.

Authors and maintainers:
* User:Jarekt 
]]
require('strict') -- used for debugging purposes as it detects cases of unintended global variables
local ISOdate = require('Module:ISOdate')._ISOdate -- date localization
local core    = require('Module:Core')

-- =======================================
-- === Local Functions ===================
-- =======================================

-- ===========================================================================
-- === Snaks are parts of Wikidata statements and this function converts   ===
-- === some of them to text                                                ===
-- === INPUTS:                                                             ===
-- ===  * snak - snack data structure                                      ===
-- ===  * lang - language id of the desired language                       ===
-- === OUTPUT:                                                             ===
-- ===  * string with value of the snack data. Different value will be     ===
-- ===    returned for each snack type                                     ===
-- ===========================================================================
local function snackValue(snak, lang)
	if (snak.snaktype == "somevalue") or (snak.snaktype == "novalue") then 
		return snak.snaktype
	end
	local val   = snak.datavalue.value
	local dtype = snak.datatype
	if (dtype == 'wikibase-item') then -- data type is a wikibase item
		return core.getLabel(val.id, lang) 
	elseif (dtype == 'quantity') then
		return tonumber(val.amount)
	elseif (dtype == 'time') then -- only most common dates are handled
		if (val.calendarmodel=='http://www.wikidata.org/entity/Q1985727') and (mw.ustring.sub(val.time,1,1)=='+') then
			 local trim = 3*math.min(val.precision,11) - 22 -- day (11)->11, month (10)->8, year (9)->5
			 return mw.ustring.sub(val.time,2,trim) -- return YYYY-MM-DD, YYYY-MM or YYYY depending on precission
		end
		return val.time ..'/' .. val.precision -- return full date
	end
	-- if dtype == "commonsMedia" or dtype == "external-id" or dtype == "string" or dtype == "url" then
	return val
end

-- ===========================================================================
-- === Get property value. If property is of type commonsMedia, external-id===
-- === string or url then return just string. If it is wikibase-item than  ===
-- === return label in the language "lang" linking to "lang" wikipedia     ===
-- === or to Wikidata. If property is of date type than return ISO string  ===
-- === (YYYY-MM-DD, YYYY-MM or YYYY) / precission                          ===
-- === INPUTS:                                                             ===
-- ===  * entity - wikidata or SDC entity                                  ===
-- ===  * prop   - wikidata property number                                ===
-- ===  * outputType - outputType=='one' returns only the first "best"     ===
-- ===                 value, while "all" will return a list of all values ===
-- ===  * lang   - language id of the desired language                     ===
-- === OUTPUT:                                                             ===
-- ===  * string with value of the snack data. Different value will be     ===
-- ===    returned for each snack type                                     ===
-- ===========================================================================
local function getProperty(entity, prop, outputType, lang)
	local output = {} 
	local val, v
	if entity.claims and entity.claims[prop] then
		for _, statement in pairs( entity:getBestStatements( prop )) do
			val = snackValue(statement.mainsnak, lang)
			if val.language then -- statement.mainsnak.datatype=='monolingualtext'
				v = val.text -- look for multiple values each with a language code
				output[val.language] = v
			else 
				table.insert(output, val)
			end
		end
	end
	if v then -- 'monolingualtext' type property with at least one value
		return core.langSwitch(output, lang) or v -- return translated value or the last one
	elseif #output==0 then
		return nil
	elseif outputType=='one' then
		return output[1]
	else
		return output
	end
end

-- ===========================================================================
-- === harvest properties with qualifiers                                   ===
-- ===========================================================================
local function getPropertyQual(entity, prop, qualifiers, lang)
	local Output = {}
	if entity.claims and entity.claims[prop] then
		for _, statement in pairs( entity:getBestStatements( prop )) do
			local output = {} -- table with fields: key, value, P... (qualifiers)
			output.value = snackValue(statement.mainsnak, lang)		
			for iQual, qual in ipairs( qualifiers ) do
				if statement.qualifiers and statement.qualifiers[qual] then
					output[qual] = snackValue(statement.qualifiers[qual][1], lang)	
				end
				table.insert(Output, output)
			end
		end
	end
	return Output
end

-- ===========================================================================
-- === Harvest wikidata properties matching creator template fields        ===
-- === INPUTS:                                                             ===
-- ===  * entity - wikidata entity                  ==
-- ===  * itemID2 - item id or a q-code from SDC                           ===
-- ===  * lang  - language id of the desired language                      ===
-- ===  * namespace - namespace number of the page calling the module      ===
-- ===========================================================================
local function harvest_wikidata(entity, lang)
	local data = {}
	local comma = mw.message.new( "comma-separator"):inLanguage(lang):plain()
	           .. mw.message.new( "Word-separator" ):inLanguage(lang):plain()

	-- get publication date and inception date
	local property = {P571='inc_date', P577='pub_date'}
	for prop, field in pairs( property ) do
		local dateStr = getProperty(entity, prop, 'one', lang)
		if dateStr and #dateStr<=10 then -- only handle day, month and year precission dates
			--data[field] = dateStr
			data[field] = ISOdate(dateStr, lang, '', 'dtstart', '100-999')
		end
	end
	data.date = data.pub_date or data.inc_date

	-- harvest string, Q-code, text properties where a single value is expected
	local property = { P123='publisher',  P179='series_title', P291='location',   --P872='printer',
					   P629='edition_of', P1433='journal',     P275='license',    --P953='url'
					   P393='edition',    P478='volume',       P433='issue',
					   P1476 = 'title',   P1680='subtitle'} 
	for prop, field in pairs( property ) do
		data[field] = getProperty(entity, prop, 'one', lang)
	end
	data.title = data.title or core.getLabel(entity.id, lang) -- if title not provided than use label

	
	-- get External identifiers
	local T = {}
	local URLs = {P356='doi: [https://dx.doi.org/%s %s]', 
			P698='PubMed ID: [https://www.ncbi.nlm.nih.gov/pubmed/?term=/%s %s]', 
			P932='PubMed Central ID: [https://www.ncbi.nlm.nih.gov/pmc/articles/PMC?term=/%s %s]', 
			P212='ISBN [[Special:BookSources/%s|%s]]' }
	for prop, url in pairs( URLs ) do
		local id = getProperty(entity, prop, 'one', lang)
		if id then
			table.insert(T, '<small>' .. string.format( url, id, id) .. '</small>')
		end
	end
	data.ident = table.concat(T, comma)
	
	-- harvest properties where multiple values are expected
	local property = {  P407='lang', P98='editor'} 
	for prop, field in pairs( property ) do
		local ids = getProperty(entity, prop, 'all', lang)
		if ids then
			data[field] = table.concat(ids, comma) 
		end
	end
	
	-- harvest author properties while using 'P1545' (series ordinal ) as sort key 
	local AuthorTable = {}
	local property = { P50='author', P2093='authorStr'} 
	for prop, field in pairs( property ) do
		local authors = getPropertyQual(entity, prop, {'P1545'}, lang)
		if authors then
			for _, author in ipairs(authors) do
				author.P1545 = author.P1545 or (#AuthorTable+1000) -- if no P1545 than keep original order
				table.insert(AuthorTable, author)
			end
		end
	end
	if #AuthorTable>0 then
		local tableComp = function (a, b) return a.P1545<b.P1545 end
		table.sort(AuthorTable, tableComp)
		T = {}
		for _, author in ipairs(AuthorTable) do
			table.insert(T, author.value)
		end
		data.author = table.concat(T, comma) 
	end

	return data
end

-- ===========================================================================
local function getFormatString(strType, lang)
	-- fetch a text template for a given language
	local text
	local key  = (strType=='book' and 'book' ) or 'journal'
	local tab  = mw.ext.data.get('I18n/Cite Wikidata.tab', lang)
	for _, row in pairs(tab.data) do
		local id, _, msg = unpack(row)
		if id == key then
			text = msg
			break
		end
	end
	return mw.ustring.gsub(text, '\|', '!')
end

-- ==================================================
-- === External functions ===========================
-- ==================================================
local p = {}

-- ===========================================================================
-- === Version of the function to be called from other LUA codes
-- ===========================================================================

function p.getCitationType(entity)	
	-- Determine if we should use book or article cite pattern
	local LUT = {
		Q3331189  = 'book',    -- version, edition, or translation
		Q5173771  = 'book',    -- brochure
		Q7725634  = 'book',    -- literary work
		Q47461344 = 'book',    -- written work 
		Q5292     = 'book',    -- encyclopedia
		Q13442814 = 'article', -- scholarly article
		Q191067   = 'article', -- article
		Q55915575 = 'article', -- scholarly work
		Q591041   = 'article'  -- scholarly publication
	}
	local iType
	for _, instance in pairs(getProperty(entity, 'P31', 'all', 'en')) do
		iType = LUT[instance]
		if iType then
			break
		end
	end
	
	if iType==nil and entity.claims then 
		local P = { edition_of='P629', journal='P1433', issue='P433'} 
		if (entity.claims[P.journal] or entity.claims[P.issue]) then 
			iType = 'article'
		elseif (entity.claims[P.edition_of]) then 
			iType = 'book'
		end
	end
	
	return iType
end

-- ===========================================================================
function p._citeWikidata(item, lang, page)
	local entity
	if type(item) == 'string' then
		entity = mw.wikibase.getEntity(item)
	else
		entity = item
	end
	if not entity then
		return nil
	end
	local data = harvest_wikidata(entity, lang)
	data.type  = p.getCitationType(entity)
	
	-- fetch a text template for a given language
	local text = getFormatString(data.type, lang)
	
	-- build text of the citation based on the text template
	-- replace "$FIELD" in the "text" with data.field value
	local fields = {'author', 'editor', 'title', 'edition', 'location', 'publisher', 
		'date', 'journal', 'volume', 'issue', 'lang', 'subtitle', 'ident'}		
	for _, field in ipairs(fields) do
		if data[field] then
		    -- replace string like "$DATE" with data.date
			text = mw.ustring.gsub(text, '$'..string.upper(field), data[field])
		end
	end
	for i = 1,5,1 do 
		-- delete and cell bracketed with "|" (now changed to "!") so we do not break 
		-- links with a single | (!)
		text = mw.ustring.gsub(text, '\![^$\!]*$[^\!]*\!', '!')
	end
	text = mw.ustring.gsub(text, '\!', '') -- remove all "|" (now changed to "!")
	text = mw.ustring.gsub(text, "^[%s\.\,]*(.-)%s*$", "%1") -- trim leading whitespaces
	return text
end

-- ===========================================================================
-- === Versions of the function to be called from template namespace
-- ===========================================================================
function p.debug(frame)
	local args   = core.getArgs(frame)
	local entity = mw.wikibase.getEntity(args.item)
	local data   = harvest_wikidata(entity, args.lang)
	local str = ''
	for field, val in pairs( data ) do
		if type(val)=='string' then
			str = str ..  '*' .. field .. ' = ' .. val .. '\n'
		else
			str = str ..  '*' .. field .. ' = ' .. table.concat(val, ' / ') .. '\n'
		end
	end
	local formatStr = getFormatString(data.type, args.lang)
	str = str ..  '* format string = ' .. formatStr .. ' for language ' .. args.lang .. '\n'
	return str
end

function p.citeWikidata(frame)
	local args = core.getArgs(frame)
	return p._citeWikidata(args.item, args.lang, args.page)
end

function p.reflist(frame)
	local args = core.getArgs(frame)
	local str = ''
	for i, j in pairs(arg.list) do
		str = str ..  '<li> ' .. p.citeitem(mw.text.trim(j),arg.lang) .. '</li>'
	end
	return str
end

return p