tokenize.js 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. 'use strict'
  2. const SINGLE_QUOTE = "'".charCodeAt(0)
  3. const DOUBLE_QUOTE = '"'.charCodeAt(0)
  4. const BACKSLASH = '\\'.charCodeAt(0)
  5. const SLASH = '/'.charCodeAt(0)
  6. const NEWLINE = '\n'.charCodeAt(0)
  7. const SPACE = ' '.charCodeAt(0)
  8. const FEED = '\f'.charCodeAt(0)
  9. const TAB = '\t'.charCodeAt(0)
  10. const CR = '\r'.charCodeAt(0)
  11. const OPEN_SQUARE = '['.charCodeAt(0)
  12. const CLOSE_SQUARE = ']'.charCodeAt(0)
  13. const OPEN_PARENTHESES = '('.charCodeAt(0)
  14. const CLOSE_PARENTHESES = ')'.charCodeAt(0)
  15. const OPEN_CURLY = '{'.charCodeAt(0)
  16. const CLOSE_CURLY = '}'.charCodeAt(0)
  17. const SEMICOLON = ';'.charCodeAt(0)
  18. const ASTERISK = '*'.charCodeAt(0)
  19. const COLON = ':'.charCodeAt(0)
  20. const AT = '@'.charCodeAt(0)
  21. const RE_AT_END = /[\t\n\f\r "#'()/;[\\\]{}]/g
  22. const RE_WORD_END = /[\t\n\f\r !"#'():;@[\\\]{}]|\/(?=\*)/g
  23. const RE_BAD_BRACKET = /.[\n"'(/\\]/
  24. const RE_HEX_ESCAPE = /[\da-f]/i
  25. module.exports = function tokenizer(input, options = {}) {
  26. let css = input.css.valueOf()
  27. let ignore = options.ignoreErrors
  28. let code, next, quote, content, escape
  29. let escaped, escapePos, prev, n, currentToken
  30. let length = css.length
  31. let pos = 0
  32. let buffer = []
  33. let returned = []
  34. function position() {
  35. return pos
  36. }
  37. function unclosed(what) {
  38. throw input.error('Unclosed ' + what, pos)
  39. }
  40. function endOfFile() {
  41. return returned.length === 0 && pos >= length
  42. }
  43. function nextToken(opts) {
  44. if (returned.length) return returned.pop()
  45. if (pos >= length) return
  46. let ignoreUnclosed = opts ? opts.ignoreUnclosed : false
  47. code = css.charCodeAt(pos)
  48. switch (code) {
  49. case NEWLINE:
  50. case SPACE:
  51. case TAB:
  52. case CR:
  53. case FEED: {
  54. next = pos
  55. do {
  56. next += 1
  57. code = css.charCodeAt(next)
  58. } while (
  59. code === SPACE ||
  60. code === NEWLINE ||
  61. code === TAB ||
  62. code === CR ||
  63. code === FEED
  64. )
  65. currentToken = ['space', css.slice(pos, next)]
  66. pos = next - 1
  67. break
  68. }
  69. case OPEN_SQUARE:
  70. case CLOSE_SQUARE:
  71. case OPEN_CURLY:
  72. case CLOSE_CURLY:
  73. case COLON:
  74. case SEMICOLON:
  75. case CLOSE_PARENTHESES: {
  76. let controlChar = String.fromCharCode(code)
  77. currentToken = [controlChar, controlChar, pos]
  78. break
  79. }
  80. case OPEN_PARENTHESES: {
  81. prev = buffer.length ? buffer.pop()[1] : ''
  82. n = css.charCodeAt(pos + 1)
  83. if (
  84. prev === 'url' &&
  85. n !== SINGLE_QUOTE &&
  86. n !== DOUBLE_QUOTE &&
  87. n !== SPACE &&
  88. n !== NEWLINE &&
  89. n !== TAB &&
  90. n !== FEED &&
  91. n !== CR
  92. ) {
  93. next = pos
  94. do {
  95. escaped = false
  96. next = css.indexOf(')', next + 1)
  97. if (next === -1) {
  98. if (ignore || ignoreUnclosed) {
  99. next = pos
  100. break
  101. } else {
  102. unclosed('bracket')
  103. }
  104. }
  105. escapePos = next
  106. while (css.charCodeAt(escapePos - 1) === BACKSLASH) {
  107. escapePos -= 1
  108. escaped = !escaped
  109. }
  110. } while (escaped)
  111. currentToken = ['brackets', css.slice(pos, next + 1), pos, next]
  112. pos = next
  113. } else {
  114. next = css.indexOf(')', pos + 1)
  115. content = css.slice(pos, next + 1)
  116. if (next === -1 || RE_BAD_BRACKET.test(content)) {
  117. currentToken = ['(', '(', pos]
  118. } else {
  119. currentToken = ['brackets', content, pos, next]
  120. pos = next
  121. }
  122. }
  123. break
  124. }
  125. case SINGLE_QUOTE:
  126. case DOUBLE_QUOTE: {
  127. quote = code === SINGLE_QUOTE ? "'" : '"'
  128. next = pos
  129. do {
  130. escaped = false
  131. next = css.indexOf(quote, next + 1)
  132. if (next === -1) {
  133. if (ignore || ignoreUnclosed) {
  134. next = pos + 1
  135. break
  136. } else {
  137. unclosed('string')
  138. }
  139. }
  140. escapePos = next
  141. while (css.charCodeAt(escapePos - 1) === BACKSLASH) {
  142. escapePos -= 1
  143. escaped = !escaped
  144. }
  145. } while (escaped)
  146. currentToken = ['string', css.slice(pos, next + 1), pos, next]
  147. pos = next
  148. break
  149. }
  150. case AT: {
  151. RE_AT_END.lastIndex = pos + 1
  152. RE_AT_END.test(css)
  153. if (RE_AT_END.lastIndex === 0) {
  154. next = css.length - 1
  155. } else {
  156. next = RE_AT_END.lastIndex - 2
  157. }
  158. currentToken = ['at-word', css.slice(pos, next + 1), pos, next]
  159. pos = next
  160. break
  161. }
  162. case BACKSLASH: {
  163. next = pos
  164. escape = true
  165. while (css.charCodeAt(next + 1) === BACKSLASH) {
  166. next += 1
  167. escape = !escape
  168. }
  169. code = css.charCodeAt(next + 1)
  170. if (
  171. escape &&
  172. code !== SLASH &&
  173. code !== SPACE &&
  174. code !== NEWLINE &&
  175. code !== TAB &&
  176. code !== CR &&
  177. code !== FEED
  178. ) {
  179. next += 1
  180. if (RE_HEX_ESCAPE.test(css.charAt(next))) {
  181. while (RE_HEX_ESCAPE.test(css.charAt(next + 1))) {
  182. next += 1
  183. }
  184. if (css.charCodeAt(next + 1) === SPACE) {
  185. next += 1
  186. }
  187. }
  188. }
  189. currentToken = ['word', css.slice(pos, next + 1), pos, next]
  190. pos = next
  191. break
  192. }
  193. default: {
  194. if (code === SLASH && css.charCodeAt(pos + 1) === ASTERISK) {
  195. next = css.indexOf('*/', pos + 2) + 1
  196. if (next === 0) {
  197. if (ignore || ignoreUnclosed) {
  198. next = css.length
  199. } else {
  200. unclosed('comment')
  201. }
  202. }
  203. currentToken = ['comment', css.slice(pos, next + 1), pos, next]
  204. pos = next
  205. } else {
  206. RE_WORD_END.lastIndex = pos + 1
  207. RE_WORD_END.test(css)
  208. if (RE_WORD_END.lastIndex === 0) {
  209. next = css.length - 1
  210. } else {
  211. next = RE_WORD_END.lastIndex - 2
  212. }
  213. currentToken = ['word', css.slice(pos, next + 1), pos, next]
  214. buffer.push(currentToken)
  215. pos = next
  216. }
  217. break
  218. }
  219. }
  220. pos++
  221. return currentToken
  222. }
  223. function back(token) {
  224. returned.push(token)
  225. }
  226. return {
  227. back,
  228. endOfFile,
  229. nextToken,
  230. position
  231. }
  232. }