/**
* i18n.js A simple flexible Javascript internationalisation system
*
* Author: Daniel Winterstein
* Version: 0.2.3
* Copyright: Winterwell http://winterwell.com
* Requires: jQuery, and SJTest (optional but recommended) for synchronous ajax loading.
* License: MIT (a commercially friendly open source license)
*/
/**
* @class I18N
*
* @param lang {string} - Two-character ISO639 language code of the destination language,
* or a language_region locale code (e.g. "en_US"),
* or a custom value for special languages (eg 'lolcat', or 'user-defined')
*
* @param data {?string} - Contents of translation csv file for `lang`,
* OR a url to load a translation csv file.
* OR an app-tag (obtained from the i18njs portal; begins with a #) to load from the i18njs portal (if you have an account).
* Loading is done synchronously (it will block), using jQuery.
*
* @param appTag {?string} Tag to report translation misses to the i18njs portal (if you have an account).
* If an appTag is provided for the data parameter (see above), then there is no need to repeat it here.
* appTags must begin with a #
*
* @param local {boolean} Use the local server for this I18N rather than i18n.soda.sh.
* Defaults to using i18n.soda.sh if absent.
**/
function I18N(lang, data, appTag, local) {
this.verbose = false;
this.version = "0.2.4";
/** Two-character ISO639 language code of the destination language,
* or a custom value for special languages (eg 'lolcat', or 'user-defined') */
this.lang = lang;
/**
* {string} Used for reporting untranslatable items.
* @see I18N.onfail()
*/
this.appTag = appTag? appTag : false;
this.urlPrefix = local ? '' : 'https://i18n.soda.sh';
/**
* Record failed translations, so we only report them once.
* {string: boolean} but set to {string} "!" if it gets too big.
*/
this.fails = {};
/**
* Format dates. By default uses Date.toLocaleString(), which uses the browser's locale setting.
* Users can replace this with their own function -- or with false to switch off.
* @param date {Date}
* @returns {string}
*/
this.dateFormat = function(date) {
// TODO Maybe pass a locale in, taken from this.lang? Newer browsers will support it.
return date.toLocaleString();
};
/**
* Format numbers. By default does nothing.
* Users can replace this with their own function -- or with false to switch off.
* @param num {number}
* @returns {string}
*/
this.numberFormat = function(num) {
return num.toString();
};
/**
* {boolean} Is it safe to use this I18N object?
*/
this.loaded = true; // may be reset to false by ajax call below
this.en2lang = {};
this.active(true);
// Load data?
if ( ! data) {
return;
}
// Is the file more than one word? Then treat it as the input
if ( ! data.match(/^\S+$/)) {
this._parseFile(data);
return;
}
// Treat file as a url.
// Is it an i18njs app-tag? Then load from the portal
if (data.charAt(0)==='#') {
// Portal resource
if ( ! this.appTag) this.appTag = data;
// Guess the language? This isn't reliable but it's a sensible fallback.
if ( ! this.lang) {
var _lang = I18N.getBrowserLanguage();
// But don't guess English, as that's probably the original
if (_lang!=='en') this.lang = _lang;
// Don't load null
if ( ! this.lang) return;
}
data = this.urlPrefix+'/i18n-trans.csv?tag='+escape(data)+'&lang='+escape(this.lang);
}
try {
this._loadFile(data);
} catch(err) {
/* Swallow file-load errors! That way you still get an I18N object */
console.error(err);
}
}
/**
* Automatically called when an I18N object is made (so the most recently made is the active one).
* You can also call it explicitly to swap between objects.
* @param on {?boolean} Set this to be active (or not).
* @returns true if this is active
*/
I18N.prototype.active = function(on) {
/**
* {I18N} The most recently made (or activated) I18N object. This will be used as a default by the jQuery plugin.
*/
if (on) I18N.active = this;
else if (on!==undefined && this === I18N.active) {
I18N.active = null;
}
return this === I18N.active;
};
/**
* Convenient static access to a global I18N
*/
I18N.tr = function(original) {
if ( ! I18N.active) new I18N();
return I18N.active.tr(original);
};
I18N._MARKERCHAR = "␚";
/**
* Add a translation to the dictionary.
*
* @param original {string}
* @param translation {string}
* @param type {?object} Plural or gender for categorise() based advanced multiple-choice translations.
*/
I18N.prototype.add = function(original, translation, type) {
// TODO unescape tab, \r\n and #?
var vars=[], tvars=[];
var key = this.canon(original, vars);
var meaning = this.canon(translation, vars, true);
// Check for multiple translations, keep multiple translations
var old = this.en2lang[key];
if (old && old !== meaning) {
if (typeof(old)==='string') {
old = [old];
}
old.push([original, meaning, type]);
this.en2lang[key] = old;
} else { // normal case
this.en2lang[key] = meaning;
}
};
/**
* @param file {string} csv text, tab separated, # to comment out lines
* 1st-column: original, 2nd-column: Translation, 3rd or more: ignored (can use for comments)
* @private
*/
I18N.prototype._parseFile = function (file) {
var lines = file.split(/[\r\n]/);
for(var i=0; i<lines.length; i++) {
var line = lines[i];
// skip blank lines & comments
if ( ! line || line.charAt(0)=='#') continue;
var bits = line.split("\t");
if (bits.length < 2) continue;
this.add(bits[0], bits[1]);
// bits[2], if present, is just a comment
}
console.log("I18N", "loaded", this);
};
/**
* Do a synchronous load of a csv file
* @param data {string} The url
*/
I18N.prototype._loadFile = function(data) {
this.file = data;
var req = {
async: false,
cache: true
};
// Is it a cross-domain fetch? Probably yes
// var i = data.indexOf('//');
// var hostname = window.location? window.location : '';
// var hn = data.substring(i+2, i+2+hostname.length);
// if (true || i === -1 || (hostname && hn === hostname)) {
// // Our server :)
// } else {
// // jsonp with caching?? TODO Does CORS work to allow cross-domain?? try-catch??
// req.jsonpCallback='_i18nCallback';
// req.dataType='jsonp';
// console.log('I18N', 'Using asynchronous loading: The race is on (this is bad, and may produce unpredictable results). Please add SJTest.js for safer loading.');
// }
// Fetch it
this.loaded = false;
$.ajax(data, req)
.done(function(result) {
this._parseFile(result);
}.bind(this))
.always(function() {
this.loaded = true;
}.bind(this));
};
/**
* @param english {string} Original text (often English)
*/
I18N.prototype.tr = function (english) {
var vars = [],
key = this.canon(english, vars),
trans = this.en2lang[key];
// multiple translations?
if(trans && typeof(trans)!=='string') {
trans = this._tr2_multi(english, vars, trans);
}
if (trans) {
return this.uncanon(trans, vars);
}
// fail -- Log it to the backend for translators to work on
if (this.loaded && english) {
if (this.lang) this.onfail(english, this.lang, key);
}
// Remove {}s and (s)
var _english = this.uncanon(key, vars);
return _english;
}
/**
* @param english {string} Raw-form to translate
* @param vars {array} From canon()
* @returns {string} translation to use
* @private
*/
I18N.prototype._tr2_multi = function(english, vars, trans) {
// exact match?
for(var j=1; j<trans.length; j++) {
if (english === trans[j][0]) {
return trans[j][1];
}
}
// typed match?
var category = {};
for (var vi=0; vi<vars.length; vi++) {
var cati = this.categorise(vars[vi]);
if (cati) {
for(p in cati) category[p] = cati[p];
}
}
if (category === {}) {
return trans[0];
}
for(var j=1; j<trans.length; j++) {
var catj = trans[j][2];
if ( ! catj) continue;
var ok = true;
for(p in catj) {
if(category[p] !== catj[p]) {
ok = false; break;
}
}
if (ok) return trans[j][1];
}
// just use the first
return trans[0];
};
/**
* Is this plural or singular? Male or female?
* @param {string} v - Variable value (probably a word or a number) to analyse;
* @returns {object}
*/
I18N.prototype.categorise = function(v) {
return false;
};
/**
* Called when we can't translate a phrase.
* The default version is for a SoDash backend -- replace it with your own logging call!
* Note: This will skip repeats, too-long texts (max:1000 characters), and it stops logging after 1000 fails.
* @param english {string} The original text.
* @param lang {string} The language we're translating to.
* @param key {string} The internal lookup key, as produced by canon(). Useful if debugging corner cases.
*/
I18N.prototype.onfail = function(english, lang, key) {
if (this.fails === "!") return;
// ignore empty tags
try {
var $en = $.parseHTML(english);
if ($en.length > 0 && ! $en.text()) {
return;
}
} catch(ohwell) {}
// Don't log giant blocks of text. Test on key, to be lenient towards tags (which can get bloated).
if (key.length > 1000) return;
// Only log a fail once!
if(this.fails[key]) return;
// Too many fails for one page to log?
var size = 0;
for(f in this.fails) size++; // NB: this will count a few bits of prototype gumpf, but it doesn't matter.
if (size > 1000) {
console.warn("I18N", "Switching off fail logging ("+lang+")");
this.fails = "!";
return;
}
// NB: Memory paranoia: cap the size of fails -- Not needed --we stopped logging long ago. if (size>10000) this.fails = {};
// Mark it as logged.
this.fails[key] = true;
if (this.verbose) console.warn("I18N", "fail ("+lang+"): "+english+" (internal key: "+key+")");
if ( ! this.appTag) return;
// canon the whitespace (but not variables, etc)
english = english.replace(/\s+/g, ' ');
// Send a cross-domain ping
$.ajax({
url: this.urlPrefix + '/lg.json',
dataType: 'jsonp',
data: {
tag: this.appTag,
msg: lang+"\t"+english
}
});
};
I18N.NUMBER = /[0-9,]+(\.\d+)?/g;
/**
* numbers, emails, html tags -- keep them untranslated
*/
I18N.KEEPME = new RegExp(
I18N.NUMBER.source
+"|\{.*?\}|\b\S+@[a-zA-Z\.]+|<\/?[a-z][a-zA-Z0-9]*[^>]*?>", 'g');
/**
* Convert into a canonical form for internal lookup.
* @param varCatcher {array}, which will collect the raw versions of "variables", for uncanon to put back.
* TODO OR the output from a previous canon(original), used to establish place-marker ordering in canon(translation).
* @param varOrder {?boolean} If true, varCatcher is interpreted as the output from a previous canon().
* @returns The "canonical" form -- with variable markers, standardised whitespace, etc.
*/
I18N.prototype.canon = function (english, varCatcher, varOrder) {
if ( ! english) return english;
if (varCatcher === undefined) varCatcher = [];
// Replace untranslated stuff with markers: numbers, {wrapped}, emails, html tags
var _canon = english.replace(I18N.KEEPME, function(m) {
if ( ! varOrder) {
var vi = varCatcher.length;
varCatcher.push(m);
return I18N._MARKERCHAR+vi; // Mark the place
}
// Which marker?
var vi = varCatcher.indexOf(m);
if (vi==-1) {
return m; // A new var-like thing. Leave it alone.
}
return I18N._MARKERCHAR+vi;
});
// standardise whitespace as " "
// TODO trim -- but we should preserve leading/trailing whitespace to avoid wordsbeingstucktogether.
_canon = _canon.replace(/\s+/g, ' ');
return _canon;
};
/**
* Inverse of canon. Sort of.
* @param canon The output from canon
* @param vars The varCatcher array from canon.
*/
I18N.prototype.uncanon = function (canon, vars) {
if ( ! canon) return canon;
var uncanon = canon;
// (s) -- done before vars are put in, as they shouldnt be edited.
if (vars.length!=0) uncanon = this._uncanon2_pluralise(canon, vars);
// vars
for(var vi=0; vi<vars.length; vi++) {
var v = vars[vi];
// Convert Dates and numbers
v = this._uncanon2_convert(v);
// Insert v back into the string
uncanon = uncanon.replace(I18N._MARKERCHAR+vi, v);
}
return uncanon;
};
/**
* @param v {string}
* @returns formatted version of v, e.g. numbers are run through numberFormat()
*/
I18N.prototype._uncanon2_convert = function(v) {
// TODO Maybe move the is number/date tests into key-storage (using different marker-chars), for some repeated-use efficiency.
// ...Is it a number?
if (this.numberFormat) {
var n = Number(v);
if ( ! isNaN(n)) return this.numberFormat(v);
}
// ...Is it a date?
if (this.dateFormat) {
var d = Date(v);
if (!isNaN(d.valueOf())) {
return this.dateFormat(d);
}
}
// Remove wrapping {}s if present
if (v.length>1 && v.charAt(0)=='{' && v.charAt(v.length-1)=='}') {
v = v.substring(1, v.length-1);
}
return v;
};
/**
* Convert (s) endings into s or ""
* @param text {string} e.g. "$0 monkey(s)"
* @param vars Placeholder values, e.g. [2]
* @returns {string} e.g. "2 monkeys"
* @private
*/
I18N.prototype._uncanon2_pluralise = function(text, vars) {
// ??we'd get a small efficiency boost if we cached whether a key requires plural handling
var isPlural = null;
for(var vi=0; vi<vars.length; vi++) {
var vs = ''+vars[vi];
if (vs.match(I18N.NUMBER)) {
if (vs==='1' || vs==='1.0') isPlural = false;
else isPlural = true;
break;
}
}
if (isPlural===true) {
// Get the correction from the translation ??should we use a more defensive regex??
text = text.replace(/(\w)\((\w{1,3})\)/g, '$1$2');
} else if (isPlural===false) {
text = text.replace(/(\w)\(\w{1,3}\)/g, '$1');
}
return text;
};
/**
* Try to guess the user's language from the browser.
* @returns language code (which could be incorrect), or null
*/
I18N.getBrowserLanguage = function() {
var locale = navigator && (navigator.language || navigator.userLanguage);
if (locale) {
// chop down "en-GB" to just "en"
var lang = locale.substring(0,2);
return lang;
}
return null;
};
/**
* Find out if there's a translation available for this string
* @param english
*/
I18N.prototype.canTranslate = function(english) {
var vars = [],
key = this.canon(english, vars);
if(this.en2lang[key]) return true;
return false;
};
// CommonJS module exports
if (typeof exports != 'undefined') {
exports.I18N = I18N;
}
/* jQuery plugin
* Define $().tr(), which applies translation from the most recent I18N object */
(function (jQuery) {
if ( ! jQuery) return;
/**
* Translate the element(s).
* @param i18n {?I18N} If unset, use the latest made/active one, or make a new one. */
jQuery.fn.tr = function(i18n) {
if ( ! i18n) i18n = I18N.active || new I18N();
return this.each(function() {
var $el = jQuery(this);
// Store the raw version (in case we switch languages later)
var raw = $el.data('i18n-raw');
if ( ! raw) {
raw = $el.html();
$el.data('i18n-raw', raw);
}
var trans = i18n.tr(raw);
$el.html(trans);
});
};
}(jQuery || $));