tokenize.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. 'use strict'
  2. /**
  3. * Cloned from https://github.com/postcss/postcss/blob/master/lib/tokenize.es6
  4. *
  5. */
  6. const SINGLE_QUOTE = '\''.charCodeAt(0);
  7. const DOUBLE_QUOTE = '"'.charCodeAt(0);
  8. const BACKSLASH = '\\'.charCodeAt(0);
  9. const SLASH = '/'.charCodeAt(0);
  10. const NEWLINE = '\n'.charCodeAt(0);
  11. const SPACE = ' '.charCodeAt(0);
  12. const FEED = '\f'.charCodeAt(0);
  13. const TAB = '\t'.charCodeAt(0);
  14. const CR = '\r'.charCodeAt(0);
  15. const OPEN_SQUARE = '['.charCodeAt(0);
  16. const CLOSE_SQUARE = ']'.charCodeAt(0);
  17. const OPEN_PARENTHESES = '('.charCodeAt(0);
  18. const CLOSE_PARENTHESES = ')'.charCodeAt(0);
  19. const OPEN_CURLY = '{'.charCodeAt(0);
  20. const CLOSE_CURLY = '}'.charCodeAt(0);
  21. const SEMICOLON = ';'.charCodeAt(0);
  22. const ASTERISK = '*'.charCodeAt(0);
  23. const COLON = ':'.charCodeAt(0);
  24. const AT = '@'.charCodeAt(0);
  25. const RE_AT_END = /[ \n\t\r\f\{\(\)'"\\;/\[\]#]/g;
  26. const RE_WORD_END = /[ \n\t\r\f\(\)\{\}:;@!'"\\\]\[#]|\/(?=\*)/g;
  27. const RE_BAD_BRACKET = /.[\\\/\("'\n]/;
  28. const RE_HEX_ESCAPE = /[a-f0-9]/i;
  29. module.exports = function tokenizer(input, options) {
  30. options = options || {}
  31. let css = input.css.valueOf();
  32. let ignore = options.ignoreErrors;
  33. let code, next, quote, lines, last, content, escape,
  34. nextLine, nextOffset, escaped, escapePos, prev, n, currentToken;
  35. let length = css.length;
  36. let offset = -1;
  37. let line = 1;
  38. let pos = 0;
  39. let buffer = [];
  40. let returned = [];
  41. function unclosed(what) {
  42. throw input.error('Unclosed ' + what, line, pos - offset);
  43. }
  44. function endOfFile() {
  45. return returned.length === 0 && pos >= length;
  46. }
  47. function nextToken() {
  48. if ( returned.length ) return returned.pop();
  49. if ( pos >= length ) return;
  50. code = css.charCodeAt(pos);
  51. if ( code === NEWLINE || code === FEED ||
  52. code === CR && css.charCodeAt(pos + 1) !== NEWLINE ) {
  53. offset = pos;
  54. line += 1;
  55. }
  56. switch ( code ) {
  57. case NEWLINE:
  58. case SPACE:
  59. case TAB:
  60. case CR:
  61. case FEED:
  62. next = pos;
  63. do {
  64. next += 1;
  65. code = css.charCodeAt(next);
  66. if ( code === NEWLINE ) {
  67. offset = next;
  68. line += 1;
  69. }
  70. } while ( code === SPACE ||
  71. code === NEWLINE ||
  72. code === TAB ||
  73. code === CR ||
  74. code === FEED );
  75. currentToken = ['space', css.slice(pos, next)];
  76. pos = next - 1;
  77. break;
  78. case OPEN_SQUARE:
  79. currentToken = ['[', '[', line, pos - offset];
  80. break;
  81. case CLOSE_SQUARE:
  82. currentToken = [']', ']', line, pos - offset];
  83. break;
  84. case OPEN_CURLY:
  85. currentToken = ['{', '{', line, pos - offset];
  86. break;
  87. case CLOSE_CURLY:
  88. currentToken = ['}', '}', line, pos - offset];
  89. break;
  90. case COLON:
  91. currentToken = [':', ':', line, pos - offset];
  92. break;
  93. case SEMICOLON:
  94. currentToken = [';', ';', line, pos - offset];
  95. break;
  96. case OPEN_PARENTHESES:
  97. prev = buffer.length ? buffer.pop()[1] : '';
  98. n = css.charCodeAt(pos + 1);
  99. if ( prev === 'url' &&
  100. n !== SINGLE_QUOTE && n !== DOUBLE_QUOTE &&
  101. n !== SPACE && n !== NEWLINE && n !== TAB &&
  102. n !== FEED && n !== CR ) {
  103. next = pos;
  104. do {
  105. escaped = false;
  106. next = css.indexOf(')', next + 1);
  107. if ( next === -1 ) {
  108. if ( ignore ) {
  109. next = pos;
  110. break;
  111. } else {
  112. unclosed('bracket');
  113. }
  114. }
  115. escapePos = next;
  116. while ( css.charCodeAt(escapePos - 1) === BACKSLASH ) {
  117. escapePos -= 1;
  118. escaped = !escaped;
  119. }
  120. } while ( escaped );
  121. currentToken = ['brackets', css.slice(pos, next + 1),
  122. line, pos - offset,
  123. line, next - offset
  124. ];
  125. pos = next;
  126. } else {
  127. next = css.indexOf(')', pos + 1);
  128. content = css.slice(pos, next + 1);
  129. if ( next === -1 || RE_BAD_BRACKET.test(content) ) {
  130. currentToken = ['(', '(', line, pos - offset];
  131. } else {
  132. currentToken = ['brackets', content,
  133. line, pos - offset,
  134. line, next - offset
  135. ];
  136. pos = next;
  137. }
  138. }
  139. break;
  140. case CLOSE_PARENTHESES:
  141. currentToken = [')', ')', line, pos - offset];
  142. break;
  143. case SINGLE_QUOTE:
  144. case DOUBLE_QUOTE:
  145. quote = code === SINGLE_QUOTE ? '\'' : '"';
  146. next = pos;
  147. do {
  148. escaped = false;
  149. next = css.indexOf(quote, next + 1);
  150. if ( next === -1 ) {
  151. if ( ignore ) {
  152. next = pos + 1;
  153. break;
  154. } else {
  155. unclosed('string');
  156. }
  157. }
  158. escapePos = next;
  159. while ( css.charCodeAt(escapePos - 1) === BACKSLASH ) {
  160. escapePos -= 1;
  161. escaped = !escaped;
  162. }
  163. } while ( escaped );
  164. content = css.slice(pos, next + 1);
  165. lines = content.split('\n');
  166. last = lines.length - 1;
  167. if ( last > 0 ) {
  168. nextLine = line + last;
  169. nextOffset = next - lines[last].length;
  170. } else {
  171. nextLine = line;
  172. nextOffset = offset;
  173. }
  174. currentToken = ['string', css.slice(pos, next + 1),
  175. line, pos - offset,
  176. nextLine, next - nextOffset
  177. ];
  178. offset = nextOffset;
  179. line = nextLine;
  180. pos = next;
  181. break;
  182. case AT:
  183. RE_AT_END.lastIndex = pos + 1;
  184. RE_AT_END.test(css);
  185. if ( RE_AT_END.lastIndex === 0 ) {
  186. next = css.length - 1;
  187. } else {
  188. next = RE_AT_END.lastIndex - 2;
  189. }
  190. currentToken = ['at-word', css.slice(pos, next + 1),
  191. line, pos - offset,
  192. line, next - offset
  193. ];
  194. pos = next;
  195. break;
  196. case BACKSLASH:
  197. next = pos;
  198. escape = true;
  199. while ( css.charCodeAt(next + 1) === BACKSLASH ) {
  200. next += 1;
  201. escape = !escape;
  202. }
  203. code = css.charCodeAt(next + 1);
  204. if ( escape && (code !== SLASH &&
  205. code !== SPACE &&
  206. code !== NEWLINE &&
  207. code !== TAB &&
  208. code !== CR &&
  209. code !== FEED ) ) {
  210. next += 1;
  211. if ( RE_HEX_ESCAPE.test(css.charAt(next)) ) {
  212. while ( RE_HEX_ESCAPE.test(css.charAt(next + 1)) ) {
  213. next += 1;
  214. }
  215. if ( css.charCodeAt(next + 1) === SPACE ) {
  216. next += 1;
  217. }
  218. }
  219. }
  220. currentToken = ['word', css.slice(pos, next + 1),
  221. line, pos - offset,
  222. line, next - offset
  223. ];
  224. pos = next;
  225. break;
  226. default:
  227. if ( code === SLASH && css.charCodeAt(pos + 1) === ASTERISK ) {
  228. next = css.indexOf('*/', pos + 2) + 1;
  229. if ( next === 0 ) {
  230. if ( ignore ) {
  231. next = css.length;
  232. } else {
  233. unclosed('comment');
  234. }
  235. }
  236. content = css.slice(pos, next + 1);
  237. lines = content.split('\n');
  238. last = lines.length - 1;
  239. if ( last > 0 ) {
  240. nextLine = line + last;
  241. nextOffset = next - lines[last].length;
  242. } else {
  243. nextLine = line;
  244. nextOffset = offset;
  245. }
  246. currentToken = ['comment', content,
  247. line, pos - offset,
  248. nextLine, next - nextOffset
  249. ];
  250. offset = nextOffset;
  251. line = nextLine;
  252. pos = next;
  253. } else if ( code === SLASH && css.charCodeAt(pos + 1) === SLASH ) {
  254. next = css.indexOf('\n', pos + 2);
  255. if (next === -1) {
  256. next = css.length - 1
  257. } else {
  258. next = next - 1
  259. }
  260. content = '/*' + css.slice(pos + 2, next + 1);
  261. content = content.replace(/\*\//g, '*\\/') + ' */'
  262. currentToken = ['comment', content,
  263. line, pos - offset,
  264. line, next - offset
  265. ];
  266. pos = next;
  267. } else {
  268. RE_WORD_END.lastIndex = pos + 1;
  269. RE_WORD_END.test(css);
  270. if ( RE_WORD_END.lastIndex === 0 ) {
  271. next = css.length - 1;
  272. } else {
  273. next = RE_WORD_END.lastIndex - 2;
  274. }
  275. currentToken = ['word', css.slice(pos, next + 1),
  276. line, pos - offset,
  277. line, next - offset
  278. ];
  279. buffer.push(currentToken);
  280. pos = next;
  281. }
  282. break;
  283. }
  284. pos++;
  285. return currentToken;
  286. }
  287. function back(token) {
  288. returned.push(token);
  289. }
  290. return {
  291. back,
  292. nextToken,
  293. endOfFile
  294. };
  295. }