Merge "Merge branch 'Wikidata' into master."
[lhc/web/wiklou.git] / maintenance / cssjanus / cssjanus.py
1 #!/usr/bin/python
2 #
3 # Copyright 2008 Google Inc. All Rights Reserved.
4
5 """Converts a LeftToRight Cascading Style Sheet into a RightToLeft one.
6
7 This is a utility script for replacing "left" oriented things in a CSS file
8 like float, padding, margin with "right" oriented values.
9 It also does the opposite.
10 The goal is to be able to conditionally serve one large, cat'd, compiled CSS
11 file appropriate for LeftToRight oriented languages and RightToLeft ones.
12 This utility will hopefully help your structural layout done in CSS in
13 terms of its RTL compatibility. It will not help with some of the more
14 complicated bidirectional text issues.
15 """
16
17 __author__ = 'elsigh@google.com (Lindsey Simon)'
18 __version__ = '0.1'
19
20 import logging
21 import re
22 import sys
23 import getopt
24 import os
25
26 import csslex
27
28 logging.getLogger().setLevel(logging.INFO)
29
30 # Global for the command line flags.
31 SWAP_LTR_RTL_IN_URL_DEFAULT = False
32 SWAP_LEFT_RIGHT_IN_URL_DEFAULT = False
33 FLAGS = {'swap_ltr_rtl_in_url': SWAP_LTR_RTL_IN_URL_DEFAULT,
34 'swap_left_right_in_url': SWAP_LEFT_RIGHT_IN_URL_DEFAULT}
35
36 # Generic token delimiter character.
37 TOKEN_DELIMITER = '~'
38
39 # This is a temporary match token we use when swapping strings.
40 TMP_TOKEN = '%sTMP%s' % (TOKEN_DELIMITER, TOKEN_DELIMITER)
41
42 # Token to be used for joining lines.
43 TOKEN_LINES = '%sJ%s' % (TOKEN_DELIMITER, TOKEN_DELIMITER)
44
45 # Global constant text strings for CSS value matches.
46 LTR = 'ltr'
47 RTL = 'rtl'
48 LEFT = 'left'
49 RIGHT = 'right'
50
51 # This is a lookbehind match to ensure that we don't replace instances
52 # of our string token (left, rtl, etc...) if there's a letter in front of it.
53 # Specifically, this prevents replacements like 'background: url(bright.png)'.
54 LOOKBEHIND_NOT_LETTER = r'(?<![a-zA-Z])'
55
56 # This is a lookahead match to make sure we don't replace left and right
57 # in actual classnames, so that we don't break the HTML/CSS dependencies.
58 # Read literally, it says ignore cases where the word left, for instance, is
59 # directly followed by valid classname characters and a curly brace.
60 # ex: .column-left {float: left} will become .column-left {float: right}
61 LOOKAHEAD_NOT_OPEN_BRACE = (r'(?!(?:%s|%s|%s|#|\:|\.|\,|\+|>)*?{)' %
62 (csslex.NMCHAR, TOKEN_LINES, csslex.SPACE))
63
64
65 # These two lookaheads are to test whether or not we are within a
66 # background: url(HERE) situation.
67 # Ref: http://www.w3.org/TR/CSS21/syndata.html#uri
68 VALID_AFTER_URI_CHARS = r'[\'\"]?%s' % csslex.WHITESPACE
69 LOOKAHEAD_NOT_CLOSING_PAREN = r'(?!%s?%s\))' % (csslex.URL_CHARS,
70 VALID_AFTER_URI_CHARS)
71 LOOKAHEAD_FOR_CLOSING_PAREN = r'(?=%s?%s\))' % (csslex.URL_CHARS,
72 VALID_AFTER_URI_CHARS)
73
74 # Compile a regex to swap left and right values in 4 part notations.
75 # We need to match negatives and decimal numeric values.
76 # ex. 'margin: .25em -2px 3px 0' becomes 'margin: .25em 0 3px -2px'.
77 POSSIBLY_NEGATIVE_QUANTITY = r'((?:-?%s)|(?:inherit|auto))' % csslex.QUANTITY
78 POSSIBLY_NEGATIVE_QUANTITY_SPACE = r'%s%s%s' % (POSSIBLY_NEGATIVE_QUANTITY,
79 csslex.SPACE,
80 csslex.WHITESPACE)
81 FOUR_NOTATION_QUANTITY_RE = re.compile(r'%s%s%s%s' %
82 (POSSIBLY_NEGATIVE_QUANTITY_SPACE,
83 POSSIBLY_NEGATIVE_QUANTITY_SPACE,
84 POSSIBLY_NEGATIVE_QUANTITY_SPACE,
85 POSSIBLY_NEGATIVE_QUANTITY),
86 re.I)
87 COLOR = r'(%s|%s)' % (csslex.NAME, csslex.HASH)
88 COLOR_SPACE = r'%s%s' % (COLOR, csslex.SPACE)
89 FOUR_NOTATION_COLOR_RE = re.compile(r'(-color%s:%s)%s%s%s(%s)' %
90 (csslex.WHITESPACE,
91 csslex.WHITESPACE,
92 COLOR_SPACE,
93 COLOR_SPACE,
94 COLOR_SPACE,
95 COLOR),
96 re.I)
97
98 # Compile the cursor resize regexes
99 CURSOR_EAST_RE = re.compile(LOOKBEHIND_NOT_LETTER + '([ns]?)e-resize')
100 CURSOR_WEST_RE = re.compile(LOOKBEHIND_NOT_LETTER + '([ns]?)w-resize')
101
102 # Matches the condition where we need to replace the horizontal component
103 # of a background-position value when expressed in horizontal percentage.
104 # Had to make two regexes because in the case of position-x there is only
105 # one quantity, and otherwise we don't want to match and change cases with only
106 # one quantity.
107 BG_HORIZONTAL_PERCENTAGE_RE = re.compile(r'background(-position)?(%s:%s)'
108 '([^%%]*?)(%s)%%'
109 '(%s(?:%s|%s))' % (csslex.WHITESPACE,
110 csslex.WHITESPACE,
111 csslex.NUM,
112 csslex.WHITESPACE,
113 csslex.QUANTITY,
114 csslex.IDENT))
115
116 BG_HORIZONTAL_PERCENTAGE_X_RE = re.compile(r'background-position-x(%s:%s)'
117 '(%s)%%' % (csslex.WHITESPACE,
118 csslex.WHITESPACE,
119 csslex.NUM))
120
121 # Matches the opening of a body selector.
122 BODY_SELECTOR = r'body%s{%s' % (csslex.WHITESPACE, csslex.WHITESPACE)
123
124 # Matches anything up until the closing of a selector.
125 CHARS_WITHIN_SELECTOR = r'[^\}]*?'
126
127 # Matches the direction property in a selector.
128 DIRECTION_RE = r'direction%s:%s' % (csslex.WHITESPACE, csslex.WHITESPACE)
129
130 # These allow us to swap "ltr" with "rtl" and vice versa ONLY within the
131 # body selector and on the same line.
132 BODY_DIRECTION_LTR_RE = re.compile(r'(%s)(%s)(%s)(ltr)' %
133 (BODY_SELECTOR, CHARS_WITHIN_SELECTOR,
134 DIRECTION_RE),
135 re.I)
136 BODY_DIRECTION_RTL_RE = re.compile(r'(%s)(%s)(%s)(rtl)' %
137 (BODY_SELECTOR, CHARS_WITHIN_SELECTOR,
138 DIRECTION_RE),
139 re.I)
140
141
142 # Allows us to swap "direction:ltr" with "direction:rtl" and
143 # vice versa anywhere in a line.
144 DIRECTION_LTR_RE = re.compile(r'%s(ltr)' % DIRECTION_RE)
145 DIRECTION_RTL_RE = re.compile(r'%s(rtl)' % DIRECTION_RE)
146
147 # We want to be able to switch left with right and vice versa anywhere
148 # we encounter left/right strings, EXCEPT inside the background:url(). The next
149 # two regexes are for that purpose. We have alternate IN_URL versions of the
150 # regexes compiled in case the user passes the flag that they do
151 # actually want to have left and right swapped inside of background:urls.
152 LEFT_RE = re.compile('%s(%s)%s%s' % (LOOKBEHIND_NOT_LETTER,
153 LEFT,
154 LOOKAHEAD_NOT_CLOSING_PAREN,
155 LOOKAHEAD_NOT_OPEN_BRACE),
156 re.I)
157 RIGHT_RE = re.compile('%s(%s)%s%s' % (LOOKBEHIND_NOT_LETTER,
158 RIGHT,
159 LOOKAHEAD_NOT_CLOSING_PAREN,
160 LOOKAHEAD_NOT_OPEN_BRACE),
161 re.I)
162 LEFT_IN_URL_RE = re.compile('%s(%s)%s' % (LOOKBEHIND_NOT_LETTER,
163 LEFT,
164 LOOKAHEAD_FOR_CLOSING_PAREN),
165 re.I)
166 RIGHT_IN_URL_RE = re.compile('%s(%s)%s' % (LOOKBEHIND_NOT_LETTER,
167 RIGHT,
168 LOOKAHEAD_FOR_CLOSING_PAREN),
169 re.I)
170 LTR_IN_URL_RE = re.compile('%s(%s)%s' % (LOOKBEHIND_NOT_LETTER,
171 LTR,
172 LOOKAHEAD_FOR_CLOSING_PAREN),
173 re.I)
174 RTL_IN_URL_RE = re.compile('%s(%s)%s' % (LOOKBEHIND_NOT_LETTER,
175 RTL,
176 LOOKAHEAD_FOR_CLOSING_PAREN),
177 re.I)
178
179 COMMENT_RE = re.compile('(%s)' % csslex.COMMENT, re.I)
180
181 NOFLIP_TOKEN = r'\@noflip'
182 # The NOFLIP_TOKEN inside of a comment. For now, this requires that comments
183 # be in the input, which means users of a css compiler would have to run
184 # this script first if they want this functionality.
185 NOFLIP_ANNOTATION = r'/\*%s%s%s\*/' % (csslex.WHITESPACE,
186 NOFLIP_TOKEN,
187 csslex. WHITESPACE)
188
189 # After a NOFLIP_ANNOTATION, and within a class selector, we want to be able
190 # to set aside a single rule not to be flipped. We can do this by matching
191 # our NOFLIP annotation and then using a lookahead to make sure there is not
192 # an opening brace before the match.
193 NOFLIP_SINGLE_RE = re.compile(r'(%s%s[^;}]+;?)' % (NOFLIP_ANNOTATION,
194 LOOKAHEAD_NOT_OPEN_BRACE),
195 re.I)
196
197 # After a NOFLIP_ANNOTATION, we want to grab anything up until the next } which
198 # means the entire following class block. This will prevent all of its
199 # declarations from being flipped.
200 NOFLIP_CLASS_RE = re.compile(r'(%s%s})' % (NOFLIP_ANNOTATION,
201 CHARS_WITHIN_SELECTOR),
202 re.I)
203
204
205 class Tokenizer:
206 """Replaces any CSS comments with string tokens and vice versa."""
207
208 def __init__(self, token_re, token_string):
209 """Constructor for the Tokenizer.
210
211 Args:
212 token_re: A regex for the string to be replace by a token.
213 token_string: The string to put between token delimiters when tokenizing.
214 """
215 logging.debug('Tokenizer::init token_string=%s' % token_string)
216 self.token_re = token_re
217 self.token_string = token_string
218 self.originals = []
219
220 def Tokenize(self, line):
221 """Replaces any string matching token_re in line with string tokens.
222
223 By passing a function as an argument to the re.sub line below, we bypass
224 the usual rule where re.sub will only replace the left-most occurrence of
225 a match by calling the passed in function for each occurrence.
226
227 Args:
228 line: A line to replace token_re matches in.
229
230 Returns:
231 line: A line with token_re matches tokenized.
232 """
233 line = self.token_re.sub(self.TokenizeMatches, line)
234 logging.debug('Tokenizer::Tokenize returns: %s' % line)
235 return line
236
237 def DeTokenize(self, line):
238 """Replaces tokens with the original string.
239
240 Args:
241 line: A line with tokens.
242
243 Returns:
244 line with any tokens replaced by the original string.
245 """
246
247 # Put all of the comments back in by their comment token.
248 for i, original in enumerate(self.originals):
249 token = '%s%s_%s%s' % (TOKEN_DELIMITER, self.token_string, i + 1,
250 TOKEN_DELIMITER)
251 line = line.replace(token, original)
252 logging.debug('Tokenizer::DeTokenize i:%s w/%s' % (i, token))
253 logging.debug('Tokenizer::DeTokenize returns: %s' % line)
254 return line
255
256 def TokenizeMatches(self, m):
257 """Replaces matches with tokens and stores the originals.
258
259 Args:
260 m: A match object.
261
262 Returns:
263 A string token which replaces the CSS comment.
264 """
265 logging.debug('Tokenizer::TokenizeMatches %s' % m.group(1))
266 self.originals.append(m.group(1))
267 return '%s%s_%s%s' % (TOKEN_DELIMITER,
268 self.token_string,
269 len(self.originals),
270 TOKEN_DELIMITER)
271
272
273 def FixBodyDirectionLtrAndRtl(line):
274 """Replaces ltr with rtl and vice versa ONLY in the body direction.
275
276 Args:
277 line: A string to replace instances of ltr with rtl.
278 Returns:
279 line with direction: ltr and direction: rtl swapped only in body selector.
280 line = FixBodyDirectionLtrAndRtl('body { direction:ltr }')
281 line will now be 'body { direction:rtl }'.
282 """
283
284 line = BODY_DIRECTION_LTR_RE.sub('\\1\\2\\3%s' % TMP_TOKEN, line)
285 line = BODY_DIRECTION_RTL_RE.sub('\\1\\2\\3%s' % LTR, line)
286 line = line.replace(TMP_TOKEN, RTL)
287 logging.debug('FixBodyDirectionLtrAndRtl returns: %s' % line)
288 return line
289
290
291 def FixLeftAndRight(line):
292 """Replaces left with right and vice versa in line.
293
294 Args:
295 line: A string in which to perform the replacement.
296
297 Returns:
298 line with left and right swapped. For example:
299 line = FixLeftAndRight('padding-left: 2px; margin-right: 1px;')
300 line will now be 'padding-right: 2px; margin-left: 1px;'.
301 """
302
303 line = LEFT_RE.sub(TMP_TOKEN, line)
304 line = RIGHT_RE.sub(LEFT, line)
305 line = line.replace(TMP_TOKEN, RIGHT)
306 logging.debug('FixLeftAndRight returns: %s' % line)
307 return line
308
309
310 def FixLeftAndRightInUrl(line):
311 """Replaces left with right and vice versa ONLY within background urls.
312
313 Args:
314 line: A string in which to replace left with right and vice versa.
315
316 Returns:
317 line with left and right swapped in the url string. For example:
318 line = FixLeftAndRightInUrl('background:url(right.png)')
319 line will now be 'background:url(left.png)'.
320 """
321
322 line = LEFT_IN_URL_RE.sub(TMP_TOKEN, line)
323 line = RIGHT_IN_URL_RE.sub(LEFT, line)
324 line = line.replace(TMP_TOKEN, RIGHT)
325 logging.debug('FixLeftAndRightInUrl returns: %s' % line)
326 return line
327
328
329 def FixLtrAndRtlInUrl(line):
330 """Replaces ltr with rtl and vice versa ONLY within background urls.
331
332 Args:
333 line: A string in which to replace ltr with rtl and vice versa.
334
335 Returns:
336 line with left and right swapped. For example:
337 line = FixLtrAndRtlInUrl('background:url(rtl.png)')
338 line will now be 'background:url(ltr.png)'.
339 """
340
341 line = LTR_IN_URL_RE.sub(TMP_TOKEN, line)
342 line = RTL_IN_URL_RE.sub(LTR, line)
343 line = line.replace(TMP_TOKEN, RTL)
344 logging.debug('FixLtrAndRtlInUrl returns: %s' % line)
345 return line
346
347
348 def FixCursorProperties(line):
349 """Fixes directional CSS cursor properties.
350
351 Args:
352 line: A string to fix CSS cursor properties in.
353
354 Returns:
355 line reformatted with the cursor properties substituted. For example:
356 line = FixCursorProperties('cursor: ne-resize')
357 line will now be 'cursor: nw-resize'.
358 """
359
360 line = CURSOR_EAST_RE.sub('\\1' + TMP_TOKEN, line)
361 line = CURSOR_WEST_RE.sub('\\1e-resize', line)
362 line = line.replace(TMP_TOKEN, 'w-resize')
363 logging.debug('FixCursorProperties returns: %s' % line)
364 return line
365
366
367 def FixFourPartNotation(line):
368 """Fixes the second and fourth positions in 4 part CSS notation.
369
370 Args:
371 line: A string to fix 4 part CSS notation in.
372
373 Returns:
374 line reformatted with the 4 part notations swapped. For example:
375 line = FixFourPartNotation('padding: 1px 2px 3px 4px')
376 line will now be 'padding: 1px 4px 3px 2px'.
377 """
378 line = FOUR_NOTATION_QUANTITY_RE.sub('\\1 \\4 \\3 \\2', line)
379 line = FOUR_NOTATION_COLOR_RE.sub('\\1\\2 \\5 \\4 \\3', line)
380 logging.debug('FixFourPartNotation returns: %s' % line)
381 return line
382
383
384 def FixBackgroundPosition(line):
385 """Fixes horizontal background percentage values in line.
386
387 Args:
388 line: A string to fix horizontal background position values in.
389
390 Returns:
391 line reformatted with the 4 part notations swapped.
392 """
393 line = BG_HORIZONTAL_PERCENTAGE_RE.sub(CalculateNewBackgroundPosition, line)
394 line = BG_HORIZONTAL_PERCENTAGE_X_RE.sub(CalculateNewBackgroundPositionX,
395 line)
396 logging.debug('FixBackgroundPosition returns: %s' % line)
397 return line
398
399
400 def CalculateNewBackgroundPosition(m):
401 """Fixes horizontal background-position percentages.
402
403 This function should be used as an argument to re.sub since it needs to
404 perform replacement specific calculations.
405
406 Args:
407 m: A match object.
408
409 Returns:
410 A string with the horizontal background position percentage fixed.
411 BG_HORIZONTAL_PERCENTAGE_RE.sub(FixBackgroundPosition,
412 'background-position: 75% 50%')
413 will return 'background-position: 25% 50%'.
414 """
415
416 # The flipped value is the offset from 100%
417 new_x = str(100-int(m.group(4)))
418
419 # Since m.group(1) may very well be None type and we need a string..
420 if m.group(1):
421 position_string = m.group(1)
422 else:
423 position_string = ''
424
425 return 'background%s%s%s%s%%%s' % (position_string, m.group(2), m.group(3),
426 new_x, m.group(5))
427
428
429 def CalculateNewBackgroundPositionX(m):
430 """Fixes percent based background-position-x.
431
432 This function should be used as an argument to re.sub since it needs to
433 perform replacement specific calculations.
434
435 Args:
436 m: A match object.
437
438 Returns:
439 A string with the background-position-x percentage fixed.
440 BG_HORIZONTAL_PERCENTAGE_X_RE.sub(CalculateNewBackgroundPosition,
441 'background-position-x: 75%')
442 will return 'background-position-x: 25%'.
443 """
444
445 # The flipped value is the offset from 100%
446 new_x = str(100-int(m.group(2)))
447
448 return 'background-position-x%s%s%%' % (m.group(1), new_x)
449
450
451 def ChangeLeftToRightToLeft(lines,
452 swap_ltr_rtl_in_url=None,
453 swap_left_right_in_url=None):
454 """Turns lines into a stream and runs the fixing functions against it.
455
456 Args:
457 lines: An list of CSS lines.
458 swap_ltr_rtl_in_url: Overrides this flag if param is set.
459 swap_left_right_in_url: Overrides this flag if param is set.
460
461 Returns:
462 The same lines, but with left and right fixes.
463 """
464
465 global FLAGS
466
467 # Possibly override flags with params.
468 logging.debug('ChangeLeftToRightToLeft swap_ltr_rtl_in_url=%s, '
469 'swap_left_right_in_url=%s' % (swap_ltr_rtl_in_url,
470 swap_left_right_in_url))
471 if swap_ltr_rtl_in_url is None:
472 swap_ltr_rtl_in_url = FLAGS['swap_ltr_rtl_in_url']
473 if swap_left_right_in_url is None:
474 swap_left_right_in_url = FLAGS['swap_left_right_in_url']
475
476 # Turns the array of lines into a single line stream.
477 logging.debug('LINES COUNT: %s' % len(lines))
478 line = TOKEN_LINES.join(lines)
479
480 # Tokenize any single line rules with the /* noflip */ annotation.
481 noflip_single_tokenizer = Tokenizer(NOFLIP_SINGLE_RE, 'NOFLIP_SINGLE')
482 line = noflip_single_tokenizer.Tokenize(line)
483
484 # Tokenize any class rules with the /* noflip */ annotation.
485 noflip_class_tokenizer = Tokenizer(NOFLIP_CLASS_RE, 'NOFLIP_CLASS')
486 line = noflip_class_tokenizer.Tokenize(line)
487
488 # Tokenize the comments so we can preserve them through the changes.
489 comment_tokenizer = Tokenizer(COMMENT_RE, 'C')
490 line = comment_tokenizer.Tokenize(line)
491
492 # Here starteth the various left/right orientation fixes.
493 line = FixBodyDirectionLtrAndRtl(line)
494
495 if swap_left_right_in_url:
496 line = FixLeftAndRightInUrl(line)
497
498 if swap_ltr_rtl_in_url:
499 line = FixLtrAndRtlInUrl(line)
500
501 line = FixLeftAndRight(line)
502 line = FixCursorProperties(line)
503 line = FixFourPartNotation(line)
504 line = FixBackgroundPosition(line)
505
506 # DeTokenize the single line noflips.
507 line = noflip_single_tokenizer.DeTokenize(line)
508
509 # DeTokenize the class-level noflips.
510 line = noflip_class_tokenizer.DeTokenize(line)
511
512 # DeTokenize the comments.
513 line = comment_tokenizer.DeTokenize(line)
514
515 # Rejoin the lines back together.
516 lines = line.split(TOKEN_LINES)
517
518 return lines
519
520 def usage():
521 """Prints out usage information."""
522
523 print 'Usage:'
524 print ' ./cssjanus.py < file.css > file-rtl.css'
525 print 'Flags:'
526 print ' --swap_left_right_in_url: Fixes "left"/"right" string within urls.'
527 print ' Ex: ./cssjanus.py --swap_left_right_in_url < file.css > file_rtl.css'
528 print ' --swap_ltr_rtl_in_url: Fixes "ltr"/"rtl" string within urls.'
529 print ' Ex: ./cssjanus --swap_ltr_rtl_in_url < file.css > file_rtl.css'
530
531 def setflags(opts):
532 """Parse the passed in command line arguments and set the FLAGS global.
533
534 Args:
535 opts: getopt iterable intercepted from argv.
536 """
537
538 global FLAGS
539
540 # Parse the arguments.
541 for opt, arg in opts:
542 logging.debug('opt: %s, arg: %s' % (opt, arg))
543 if opt in ("-h", "--help"):
544 usage()
545 sys.exit()
546 elif opt in ("-d", "--debug"):
547 logging.getLogger().setLevel(logging.DEBUG)
548 elif opt == '--swap_ltr_rtl_in_url':
549 FLAGS['swap_ltr_rtl_in_url'] = True
550 elif opt == '--swap_left_right_in_url':
551 FLAGS['swap_left_right_in_url'] = True
552
553
554 def main(argv):
555 """Sends stdin lines to ChangeLeftToRightToLeft and writes to stdout."""
556
557 # Define the flags.
558 try:
559 opts, args = getopt.getopt(argv, 'hd', ['help', 'debug',
560 'swap_left_right_in_url',
561 'swap_ltr_rtl_in_url'])
562 except getopt.GetoptError:
563 usage()
564 sys.exit(2)
565
566 # Parse and set the flags.
567 setflags(opts)
568
569 # Call the main routine with all our functionality.
570 fixed_lines = ChangeLeftToRightToLeft(sys.stdin.readlines())
571 sys.stdout.write(''.join(fixed_lines))
572
573 if __name__ == '__main__':
574 main(sys.argv[1:])