Module:Citation/CS1/COinS: Difference between revisions

Line 1:

local coins = {};

--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------

]]

local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities

local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities

local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration

⚫

--[[--------------------------< S ~~T R I P _ A~~ P O S T R O P H E _ M A R K U P >--------------------------------

Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.

This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to

markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.

⚫

]]

local function strip_apostrophe_markup (argument)

if not is_set (argument) then return argument; end

if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe? If not, exit.

return argument;

end

while true do

if argument:find ( "'''''", 1, true ) then -- bold italic (5)

argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it

elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4)

argument=argument:gsub("%'%'%'%'", "");

elseif argument:find ( "'''", 1, true ) then -- bold (3)

argument=argument:gsub("%'%'%'", "");

elseif argument:find ( "''", 1, true ) then -- italic (2)

argument=argument:gsub("%'%'", "");

else

break;

end

return argument; -- done

end

Line 52:

Line 17:

local function make_coins_title (title, script)

title = has_accept_as_written (title);

if is_set (title) then

title = strip_apostrophe_markup (title); -- strip any apostrophe markup

else

title=''; -- if not set, make sure title is an empty string

title = ''; -- if not set, make sure title is an empty string

end

if is_set (script) then

Line 61:

Line 27:

script = strip_apostrophe_markup (script); -- strip any apostrophe markup

else

script=''; -- if not set, make sure script is an empty string

script = ''; -- if not set, make sure script is an empty string

end

if is_set (title) and is_set (script) then

Line 72:

Line 38:

--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------

Returns a string where all of ~~lua~~'s magic characters have been escaped. This is important because functions like

Returns a string where all of Lua's magic characters have been escaped. This is important because functions like

string.gsub() treat their pattern and replace strings as patterns, not literal strings.

]]

Line 78:

Line 44:

local function escape_lua_magic_chars (argument)

argument = argument:gsub("%%", "%%%%"); -- replace % with %%

argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other ~~lua~~ magic pattern characters

argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters

return argument;

end

Line 94:

Line 60:

while true do

pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the ~~url~~ and following space(s): "[url "

pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url "

if nil == pattern then break; end -- no more ~~urls~~

if nil == pattern then break; end -- no more URLs

pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape ~~lua~~'s magic pattern characters

pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters

pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible

end

pages = pages:gsub("[%[%]]", ""); -- remove the brackets

pages = pages:gsub("–", "-" ); -- replace endashes with hyphens

pages = pages:gsub("&%w+;", "-" ); -- and replace ~~html~~ entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?

pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?

return pages;

end

Line 114:

Line 80:

MathML with SVG or PNG fallback

All three are heavy with ~~html~~ and ~~css~~ which doesn't belong in the metadata.

All three are heavy with HTML and CSS which doesn't belong in the metadata.

Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings

Line 121:

Line 87:

This function gets the rendered form of an equation according to the editor's preference before the page is saved. It

then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so

that the page is saved without extraneous ~~html~~/~~css~~ markup and with a reasonably readable text form of the equation.

that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.

When a replacement is made, this function returns true and the value with replacement; otherwise false and the ~~intital~~

When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial

value. To replace multipe equations it is ~~necesary~~ to call this function from within a loop.

value. To replace multipe equations it is necessary to call this function from within a loop.

]=]

Line 154:

Line 120:

--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------

Cleanup parameter values for the metadata by removing or replacing invisible characters and certain ~~html~~ entities.

Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.

2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content

Line 171:

Line 137:

end

value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message

value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content

Line 177:

Line 143:

value = value:gsub (' ', ' '); -- replace   entity with plain space

value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space

if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script

if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script

value = value:gsub ('&zwj;', ''); -- remove &zwj; entities

value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen

end

value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space

value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space

return value;

end

Line 214:

Line 180:

});

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', '~~journal~~', '~~news~~'~~}) or (in_array (class~~, {'~~conference~~', '~~interview', 'map', 'press release', 'web~~'}~~) and is_set(data.Periodical)~~) or

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn', 'journal', 'news', 'magazine'}) or

(in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or

('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then

OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx'}) then -- set genre according to the type of citation template we are rendering

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'ssrn'}) then -- set genre according to the type of citation template we are rendering

OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx

OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite ssrn

elseif 'conference' == class then

OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)

Line 231:

Line 198:

-- these used only for periodicals

OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall

OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.

OCinSoutput["rft.chron"] = data.Chron; -- free-form date components

OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books

OCinSoutput["rft.issue"] = data.Issue;

OCinSoutput['rft.artnum'] = data.ArticleNumber; -- {{cite journal}} only

OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata

Line 254:

Line 223:

end

else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}

else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}

OCinSoutput["rft.genre"] = "unknown";

end

Line 270:

Line 239:

OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation

end

-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"

-- and now common parameters (as much as possible)

OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation

for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?

-- if k == 'ISBN' then v = clean_isbn( v ) end

if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end

local id = cfg.id_handlers[k].COinS;

if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry

OCinSoutput["rft_id"] = table.concat{ id, "/", v };

elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords

elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords

OCinSoutput[ id ] = v;

elseif id then -- ~~when~~ ~~cfg.id_handlers[k].COinS~~ is ~~not~~ ~~nil~~

elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol=

OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].~~prefix, v~~ }; ~~-- others; provide a url~~

OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});

elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here

OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)

end

--[[

for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?

local id, value = cfg.id_handlers[k].COinS;

if k == 'ISBN' then value = clean_isbn( v ); else value = v; end

if string.sub( id or "", 1, 4 ) == 'info' then

OCinSoutput["rft_id"] = table.concat{ id, "/", v };

else

OCinSoutput[ id ] = value;

end

]]

local last, first;

for k, v in ipairs( data.Authors ) do

last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki ~~strip markers~~, non-printing or invisible ~~characers~~

last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters

if k == 1 then -- for the first author name only

if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name

OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation

OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation

Line 313:

Line 273:

OCinSoutput["rft.au"] = last; -- book, journal, dissertation

end

-- TODO: At present we do not report "et al.". Add anything special if this condition applies?

end

Line 318:

Line 279:

OCinSoutput.rft_id = data.URL;

OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };

⚫

OCinSoutput = setmetatable( OCinSoutput, nil );

-- TODO: Add optional extra info:

-- rfr_dat=#REVISION<version> (referrer private data)

-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)

-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)

-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)

⚫

OCinSoutput = setmetatable( OCinSoutput, nil );

-- sort with version string always first, and combine.

--table.sort( OCinSoutput );

-- table.sort( OCinSoutput );

table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"

return table.concat(OCinSoutput, "&");

end

Line 336:

Line 304:

cfg = cfg_table_ptr;

~~is_set~~ = utilities_page_ptr.~~is_set~~; -- import functions from ~~select~~ Module:Citation/CS1/Utilities module

has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from selected Module:Citation/CS1/Utilities module

is_set = utilities_page_ptr.is_set;

in_array = utilities_page_ptr.in_array;

remove_wiki_link = utilities_page_ptr.remove_wiki_link;

strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;

end

⚫

--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------

⚫

]]

return {