Replacing accented characters with plain ascii ones [duplicate]
Clash Royale CLAN TAG#URR8PPP
Replacing accented characters with plain ascii ones [duplicate]
This question already has an answer here:
I need to turn a list of last names into alphanumeric usernames, however unfortunately some of them contain non-ascii characters:
Hernández
Quermançós
Migueláñez
Now one way would just to use a regex to remove any non-alpha numeric characters such as a.replace(/[^a-z0-9]/gi,'')
. However a more intuitive solution (at least for the user) would be to replace accented characters with their "plain" equivalent, e.g. turn á
, á
into a
, and ç
into c
, etc. Is there an easy way to do this in javascript?
a.replace(/[^a-z0-9]/gi,'')
á
á
a
ç
c
This question has been asked before and already has an answer. If those answers do not fully address your question, please ask a new question.
@Eugene a dictionary Object would be a better choice for a string-to-string mapping
– Paul S.
Aug 8 '13 at 10:24
@PaulS. - what do you mean? I've just googled if there are some standard dictionary objects in javascript with no success. Could you please clarify?
– Alma Do
Aug 8 '13 at 10:28
see this question.
– algoni
Aug 8 '13 at 10:30
@Eugene it's a normal Object which you use as a dictionary (because all keys are strings and properties of objects can be strings), for example the
translate
Object in question linked by algoni, or, closure's answer– Paul S.
Aug 8 '13 at 10:46
translate
3 Answers
3
The correct terminology for such accents is Diacritics. After Googling this term, I found this function which is part of backbone.paginator
. It has a very complete collection of Diacritics and replaces them with their most intuitive ascii character. I found this to be the most complete Javascript solution available today.
backbone.paginator
The full function for future reference:
function removeDiacritics (str)
var defaultDiacriticsRemovalMap = [
'base':'A', 'letters':/[u0041u24B6uFF21u00C0u00C1u00C2u1EA6u1EA4u1EAAu1EA8u00C3u0100u0102u1EB0u1EAEu1EB4u1EB2u0226u01E0u00C4u01DEu1EA2u00C5u01FAu01CDu0200u0202u1EA0u1EACu1EB6u1E00u0104u023Au2C6F]/g,
'base':'AA','letters':/[uA732]/g,
'base':'AE','letters':/[u00C6u01FCu01E2]/g,
'base':'AO','letters':/[uA734]/g,
'base':'AU','letters':/[uA736]/g,
'base':'AV','letters':/[uA738uA73A]/g,
'base':'AY','letters':/[uA73C]/g,
'base':'B', 'letters':/[u0042u24B7uFF22u1E02u1E04u1E06u0243u0182u0181]/g,
'base':'C', 'letters':/[u0043u24B8uFF23u0106u0108u010Au010Cu00C7u1E08u0187u023BuA73E]/g,
'base':'D', 'letters':/[u0044u24B9uFF24u1E0Au010Eu1E0Cu1E10u1E12u1E0Eu0110u018Bu018Au0189uA779]/g,
'base':'DZ','letters':/[u01F1u01C4]/g,
'base':'Dz','letters':/[u01F2u01C5]/g,
'base':'E', 'letters':/[u0045u24BAuFF25u00C8u00C9u00CAu1EC0u1EBEu1EC4u1EC2u1EBCu0112u1E14u1E16u0114u0116u00CBu1EBAu011Au0204u0206u1EB8u1EC6u0228u1E1Cu0118u1E18u1E1Au0190u018E]/g,
'base':'F', 'letters':/[u0046u24BBuFF26u1E1Eu0191uA77B]/g,
'base':'G', 'letters':/[u0047u24BCuFF27u01F4u011Cu1E20u011Eu0120u01E6u0122u01E4u0193uA7A0uA77DuA77E]/g,
'base':'H', 'letters':/[u0048u24BDuFF28u0124u1E22u1E26u021Eu1E24u1E28u1E2Au0126u2C67u2C75uA78D]/g,
'base':'I', 'letters':/[u0049u24BEuFF29u00CCu00CDu00CEu0128u012Au012Cu0130u00CFu1E2Eu1EC8u01CFu0208u020Au1ECAu012Eu1E2Cu0197]/g,
'base':'J', 'letters':/[u004Au24BFuFF2Au0134u0248]/g,
'base':'K', 'letters':/[u004Bu24C0uFF2Bu1E30u01E8u1E32u0136u1E34u0198u2C69uA740uA742uA744uA7A2]/g,
'base':'L', 'letters':/[u004Cu24C1uFF2Cu013Fu0139u013Du1E36u1E38u013Bu1E3Cu1E3Au0141u023Du2C62u2C60uA748uA746uA780]/g,
'base':'LJ','letters':/[u01C7]/g,
'base':'Lj','letters':/[u01C8]/g,
'base':'M', 'letters':/[u004Du24C2uFF2Du1E3Eu1E40u1E42u2C6Eu019C]/g,
'base':'N', 'letters':/[u004Eu24C3uFF2Eu01F8u0143u00D1u1E44u0147u1E46u0145u1E4Au1E48u0220u019DuA790uA7A4]/g,
'base':'NJ','letters':/[u01CA]/g,
'base':'Nj','letters':/[u01CB]/g,
'base':'O', 'letters':/[u004Fu24C4uFF2Fu00D2u00D3u00D4u1ED2u1ED0u1ED6u1ED4u00D5u1E4Cu022Cu1E4Eu014Cu1E50u1E52u014Eu022Eu0230u00D6u022Au1ECEu0150u01D1u020Cu020Eu01A0u1EDCu1EDAu1EE0u1EDEu1EE2u1ECCu1ED8u01EAu01ECu00D8u01FEu0186u019FuA74AuA74C]/g,
'base':'OI','letters':/[u01A2]/g,
'base':'OO','letters':/[uA74E]/g,
'base':'OU','letters':/[u0222]/g,
'base':'P', 'letters':/[u0050u24C5uFF30u1E54u1E56u01A4u2C63uA750uA752uA754]/g,
'base':'Q', 'letters':/[u0051u24C6uFF31uA756uA758u024A]/g,
'base':'R', 'letters':/[u0052u24C7uFF32u0154u1E58u0158u0210u0212u1E5Au1E5Cu0156u1E5Eu024Cu2C64uA75AuA7A6uA782]/g,
'base':'S', 'letters':/[u0053u24C8uFF33u1E9Eu015Au1E64u015Cu1E60u0160u1E66u1E62u1E68u0218u015Eu2C7EuA7A8uA784]/g,
'base':'T', 'letters':/[u0054u24C9uFF34u1E6Au0164u1E6Cu021Au0162u1E70u1E6Eu0166u01ACu01AEu023EuA786]/g,
'base':'TZ','letters':/[uA728]/g,
'base':'U', 'letters':/[u0055u24CAuFF35u00D9u00DAu00DBu0168u1E78u016Au1E7Au016Cu00DCu01DBu01D7u01D5u01D9u1EE6u016Eu0170u01D3u0214u0216u01AFu1EEAu1EE8u1EEEu1EECu1EF0u1EE4u1E72u0172u1E76u1E74u0244]/g,
'base':'V', 'letters':/[u0056u24CBuFF36u1E7Cu1E7Eu01B2uA75Eu0245]/g,
'base':'VY','letters':/[uA760]/g,
'base':'W', 'letters':/[u0057u24CCuFF37u1E80u1E82u0174u1E86u1E84u1E88u2C72]/g,
'base':'X', 'letters':/[u0058u24CDuFF38u1E8Au1E8C]/g,
'base':'Y', 'letters':/[u0059u24CEuFF39u1EF2u00DDu0176u1EF8u0232u1E8Eu0178u1EF6u1EF4u01B3u024Eu1EFE]/g,
'base':'Z', 'letters':/[u005Au24CFuFF3Au0179u1E90u017Bu017Du1E92u1E94u01B5u0224u2C7Fu2C6BuA762]/g,
'base':'a', 'letters':/[u0061u24D0uFF41u1E9Au00E0u00E1u00E2u1EA7u1EA5u1EABu1EA9u00E3u0101u0103u1EB1u1EAFu1EB5u1EB3u0227u01E1u00E4u01DFu1EA3u00E5u01FBu01CEu0201u0203u1EA1u1EADu1EB7u1E01u0105u2C65u0250]/g,
'base':'aa','letters':/[uA733]/g,
'base':'ae','letters':/[u00E6u01FDu01E3]/g,
'base':'ao','letters':/[uA735]/g,
'base':'au','letters':/[uA737]/g,
'base':'av','letters':/[uA739uA73B]/g,
'base':'ay','letters':/[uA73D]/g,
'base':'b', 'letters':/[u0062u24D1uFF42u1E03u1E05u1E07u0180u0183u0253]/g,
'base':'c', 'letters':/[u0063u24D2uFF43u0107u0109u010Bu010Du00E7u1E09u0188u023CuA73Fu2184]/g,
'base':'d', 'letters':/[u0064u24D3uFF44u1E0Bu010Fu1E0Du1E11u1E13u1E0Fu0111u018Cu0256u0257uA77A]/g,
'base':'dz','letters':/[u01F3u01C6]/g,
'base':'e', 'letters':/[u0065u24D4uFF45u00E8u00E9u00EAu1EC1u1EBFu1EC5u1EC3u1EBDu0113u1E15u1E17u0115u0117u00EBu1EBBu011Bu0205u0207u1EB9u1EC7u0229u1E1Du0119u1E19u1E1Bu0247u025Bu01DD]/g,
'base':'f', 'letters':/[u0066u24D5uFF46u1E1Fu0192uA77C]/g,
'base':'g', 'letters':/[u0067u24D6uFF47u01F5u011Du1E21u011Fu0121u01E7u0123u01E5u0260uA7A1u1D79uA77F]/g,
'base':'h', 'letters':/[u0068u24D7uFF48u0125u1E23u1E27u021Fu1E25u1E29u1E2Bu1E96u0127u2C68u2C76u0265]/g,
'base':'hv','letters':/[u0195]/g,
'base':'i', 'letters':/[u0069u24D8uFF49u00ECu00EDu00EEu0129u012Bu012Du00EFu1E2Fu1EC9u01D0u0209u020Bu1ECBu012Fu1E2Du0268u0131]/g,
'base':'j', 'letters':/[u006Au24D9uFF4Au0135u01F0u0249]/g,
'base':'k', 'letters':/[u006Bu24DAuFF4Bu1E31u01E9u1E33u0137u1E35u0199u2C6AuA741uA743uA745uA7A3]/g,
'base':'l', 'letters':/[u006Cu24DBuFF4Cu0140u013Au013Eu1E37u1E39u013Cu1E3Du1E3Bu017Fu0142u019Au026Bu2C61uA749uA781uA747]/g,
'base':'lj','letters':/[u01C9]/g,
'base':'m', 'letters':/[u006Du24DCuFF4Du1E3Fu1E41u1E43u0271u026F]/g,
'base':'n', 'letters':/[u006Eu24DDuFF4Eu01F9u0144u00F1u1E45u0148u1E47u0146u1E4Bu1E49u019Eu0272u0149uA791uA7A5]/g,
'base':'nj','letters':/[u01CC]/g,
'base':'o', 'letters':/[u006Fu24DEuFF4Fu00F2u00F3u00F4u1ED3u1ED1u1ED7u1ED5u00F5u1E4Du022Du1E4Fu014Du1E51u1E53u014Fu022Fu0231u00F6u022Bu1ECFu0151u01D2u020Du020Fu01A1u1EDDu1EDBu1EE1u1EDFu1EE3u1ECDu1ED9u01EBu01EDu00F8u01FFu0254uA74BuA74Du0275]/g,
'base':'oi','letters':/[u01A3]/g,
'base':'ou','letters':/[u0223]/g,
'base':'oo','letters':/[uA74F]/g,
'base':'p','letters':/[u0070u24DFuFF50u1E55u1E57u01A5u1D7DuA751uA753uA755]/g,
'base':'q','letters':/[u0071u24E0uFF51u024BuA757uA759]/g,
'base':'r','letters':/[u0072u24E1uFF52u0155u1E59u0159u0211u0213u1E5Bu1E5Du0157u1E5Fu024Du027DuA75BuA7A7uA783]/g,
'base':'s','letters':/[u0073u24E2uFF53u00DFu015Bu1E65u015Du1E61u0161u1E67u1E63u1E69u0219u015Fu023FuA7A9uA785u1E9B]/g,
'base':'t','letters':/[u0074u24E3uFF54u1E6Bu1E97u0165u1E6Du021Bu0163u1E71u1E6Fu0167u01ADu0288u2C66uA787]/g,
'base':'tz','letters':/[uA729]/g,
'base':'u','letters':/[u0075u24E4uFF55u00F9u00FAu00FBu0169u1E79u016Bu1E7Bu016Du00FCu01DCu01D8u01D6u01DAu1EE7u016Fu0171u01D4u0215u0217u01B0u1EEBu1EE9u1EEFu1EEDu1EF1u1EE5u1E73u0173u1E77u1E75u0289]/g,
'base':'v','letters':/[u0076u24E5uFF56u1E7Du1E7Fu028BuA75Fu028C]/g,
'base':'vy','letters':/[uA761]/g,
'base':'w','letters':/[u0077u24E6uFF57u1E81u1E83u0175u1E87u1E85u1E98u1E89u2C73]/g,
'base':'x','letters':/[u0078u24E7uFF58u1E8Bu1E8D]/g,
'base':'y','letters':/[u0079u24E8uFF59u1EF3u00FDu0177u1EF9u0233u1E8Fu00FFu1EF7u1E99u1EF5u01B4u024Fu1EFF]/g,
'base':'z','letters':/[u007Au24E9uFF5Au017Au1E91u017Cu017Eu1E93u1E95u01B6u0225u0240u2C6CuA763]/g
];
for(var i=0; i<defaultDiacriticsRemovalMap.length; i++)
str = str.replace(defaultDiacriticsRemovalMap[i].letters, defaultDiacriticsRemovalMap[i].base);
return str;
I see a lot of possible improvement in this solution. First, I don't understand why all those 'base' and 'letters'. Just use the 'base' as the key and the 'letters' as the value. Then, writing all chars by their unicode number seems pretty hard to maintain, would not be better to just write the real char?
– NLemay
Nov 14 '13 at 16:28
@Jeon I guess backbone found their solution on stackoverflow.com/a/18391901/759452 or even here web.archive.org/web/20120918093154/http://lehelk.com/2011/05/06/…
– Adrien Be
Sep 16 '14 at 14:49
@NLemay To your first point, some people prefer to keep data out of code structure -- there are benefits when iterating and validating structure.
– RobW
Jun 19 at 14:35
Since those characters have no mathematical relation to their 'plain equivalents' in the unicode table you will have to replace them manually using something like this:
function cleanUpSpecialChars(str)
return str
.replace(/[ÀÁÂÃÄÅ]/g,"A")
.replace(/[àáâãäå]/g,"a")
.replace(/[ÈÉÊË]/g,"E")
//.... all the rest
.replace(/[^a-z0-9]/gi,''); // final clean up
The case-insensitve option doesn't work on those characters, so you have to do it for the lower and upper case variants of them.
Clean, simple and nice. For me, this is clearly the best solution.
– NLemay
Nov 14 '13 at 16:15
Not complete but it's totally this one that I take :p
– Thomas Leduc
Sep 24 '14 at 20:54
.replace(/[ÀÁÂÃÄÅ]/g,"A") .replace(/[ÈÉÊË]/g,"E") .replace(/[Î]/g,"I") .replace(/[Ô]/g,"O") .replace(/[Ù]/g,"U") .replace(/[Ç]/g,"C")
– bArraxas
Aug 1 '16 at 13:14
this function delete space
– mik3fly-4steri5k
yesterday
Say you have a dictionary like:
var dict = "á":"a", "á":"a", "ç":"c"
then do a function like:
a.replace(/[^w ]/g, function(char)
return dict[char] );
I think the easiest way is to store an array that represents mapping non-ascii to ascii and then substitute.
– Alma Do
Aug 8 '13 at 10:23