Module:Citation/CS1/COinS: Difference between revisions

sync from sandbox;
m (1 revision imported)
en>Trappist the monk
(sync from sandbox;)
Line 1:
 
local coins = {};
 
 
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
]]
 
local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities
 
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
 
 
--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
 
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
 
]]
 
local function strip_apostrophe_markup (argument)
if not is_set (argument) then return argument; end
 
if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe? If not, exit.
return argument;
end
 
while true do
if argument:find ( "'''''", 1, true ) then -- bold italic (5)
argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4)
argument=argument:gsub("%'%'%'%'", "");
elseif argument:find ( "'''", 1, true ) then -- bold (3)
argument=argument:gsub("%'%'%'", "");
elseif argument:find ( "''", 1, true ) then -- italic (2)
argument=argument:gsub("%'%'", "");
else
break;
end
end
return argument; -- done
end
 
 
Line 52 ⟶ 17:
 
local function make_coins_title (title, script)
title = has_accept_as_written (title);
if is_set (title) then
title = strip_apostrophe_markup (title); -- strip any apostrophe markup
else
title = ''; -- if not set, make sure title is an empty string
end
if is_set (script) then
Line 61 ⟶ 27:
script = strip_apostrophe_markup (script); -- strip any apostrophe markup
else
script = ''; -- if not set, make sure script is an empty string
end
if is_set (title) and is_set (script) then
Line 72 ⟶ 38:
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
 
Returns a string where all of luaLua's magic characters have been escaped. This is important because functions like
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
]]
Line 78 ⟶ 44:
local function escape_lua_magic_chars (argument)
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other luaLua magic pattern characters
return argument;
end
Line 94 ⟶ 60:
while true do
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the urlURL and following space(s): "[url "
if nil == pattern then break; end -- no more urlsURLs
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape luaLua's magic pattern characters
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
end
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
pages = pages:gsub("&%w+;", "-" ); -- and replace htmlHTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
return pages;
end
Line 114 ⟶ 80:
MathML with SVG or PNG fallback
 
All three are heavy with htmlHTML and cssCSS which doesn't belong in the metadata.
 
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
Line 121 ⟶ 87:
This function gets the rendered form of an equation according to the editor's preference before the page is saved. It
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
that the page is saved without extraneous htmlHTML/cssCSS markup and with a reasonably readable text form of the equation.
 
When a replacement is made, this function returns true and the value with replacement; otherwise false and the intitalinitial
value. To replace multipe equations it is necesarynecessary to call this function from within a loop.
 
]=]
Line 154 ⟶ 120:
--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
 
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain htmlHTML entities.
 
2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content
Line 171 ⟶ 137:
end
 
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
Line 177 ⟶ 143:
value = value:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with plain space
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero -width joiner characters from indic script
value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
end
value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space
return value;
end
Line 214 ⟶ 180:
});
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'journalssrn', 'newsjournal'}) or (in_array (class, {'conferencenews', 'interview', 'map', 'press release', 'webmagazine'}) and is_set(data.Periodical)) or
(in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn
elseif 'conference' == class then
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
Line 231 ⟶ 198:
-- these used only for periodicals
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
OCinSoutput["rft.issue"] = data.Issue;
OCinSoutput['rft.artnum'] = data.ArticleNumber; -- {{cite journal}} only
OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
 
Line 254 ⟶ 223:
end
end
else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
OCinSoutput["rft.genre"] = "unknown";
end
Line 270 ⟶ 239:
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
end
-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
-- and now common parameters (as much as possible)
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
 
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
-- if k == 'ISBN' then v = clean_isbn( v ) end
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
local id = cfg.id_handlers[k].COinS;
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords
OCinSoutput[ id ] = v;
elseif 'url' == id then -- whenfor urls that are assembled in cfg.id_handlers[k].COinS~/Identifiers; is|asin= notand nil|ol=
OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].prefix, v label}); -- others; provide a url
elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)
end
end
 
--[[
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
local id, value = cfg.id_handlers[k].COinS;
if k == 'ISBN' then value = clean_isbn( v ); else value = v; end
if string.sub( id or "", 1, 4 ) == 'info' then
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
else
OCinSoutput[ id ] = value;
end
end
]]
local last, first;
for k, v in ipairs( data.Authors ) do
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markersstripmarkers, non-printing or invisible characerscharacters
if k == 1 then -- for the first author name only
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
Line 313 ⟶ 273:
OCinSoutput["rft.au"] = last; -- book, journal, dissertation
end
-- TODO: At present we do not report "et al.". Add anything special if this condition applies?
end
end
Line 318 ⟶ 279:
OCinSoutput.rft_id = data.URL;
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
 
OCinSoutput = setmetatable( OCinSoutput, nil );
-- TODO: Add optional extra info:
-- rfr_dat=#REVISION<version> (referrer private data)
-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
OCinSoutput = setmetatable( OCinSoutput, nil );
 
-- sort with version string always first, and combine.
-- table.sort( OCinSoutput );
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
return table.concat(OCinSoutput, "&");
end
Line 336 ⟶ 304:
cfg = cfg_table_ptr;
 
is_sethas_accept_as_written = utilities_page_ptr.is_sethas_accept_as_written; -- import functions from selectselected Module:Citation/CS1/Utilities module
is_set = utilities_page_ptr.is_set;
in_array = utilities_page_ptr.in_array;
remove_wiki_link = utilities_page_ptr.remove_wiki_link;
strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
end
 
 
--[[--------------------------< SE T R I P _ AX P O SR T RE OD P H EF _U MN AC RT KI UO PN S >------------------------------------------
]]
 
return {
Anonymous user