1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Process Chrome resources (HTML/CSS/JS) to handle <include> and <if> tags."""
7 from collections
import defaultdict
12 class LineNumber(object):
13 """A simple wrapper to hold line information (e.g. file.js:32).
16 source_file: A file path.
17 line_number: The line in |file|.
19 def __init__(self
, source_file
, line_number
):
20 self
.file = source_file
21 self
.line_number
= int(line_number
)
24 class FileCache(object):
25 """An in-memory cache to speed up reading the same files over and over.
28 FileCache.read(path_to_file)
31 _cache
= defaultdict(str)
34 def read(self
, source_file
):
35 """Read a file and return it as a string.
38 source_file: a file to read and return the contents of.
43 abs_file
= os
.path
.abspath(source_file
)
44 self
._cache
[abs_file
] = self
._cache
[abs_file
] or open(abs_file
, "r").read()
45 return self
._cache
[abs_file
]
48 class Processor(object):
49 """Processes resource files, inlining the contents of <include> tags, removing
50 <if> tags, and retaining original line info.
56 3: <include src="win.js">
64 4: alert('Ew; Windows.');
68 source_file: A file to process.
71 contents: Expanded contents after inlining <include>s and stripping <if>s.
72 included_files: A list of files that were inlined via <include>.
75 _IF_TAGS_REG
= "</?if[^>]*?>"
76 _INCLUDE_REG
= "<include[^>]+src=['\"]([^>]*)['\"]>"
78 def __init__(self
, source_file
):
79 self
._included
_files
= set()
81 self
._lines
= self
._get
_file
(source_file
)
83 while self
._index
< len(self
._lines
):
84 current_line
= self
._lines
[self
._index
]
85 match
= re
.search(self
._INCLUDE
_REG
, current_line
[2])
87 file_dir
= os
.path
.dirname(current_line
[0])
88 file_name
= os
.path
.abspath(os
.path
.join(file_dir
, match
.group(1)))
89 if file_name
not in self
._included
_files
:
90 self
._include
_file
(file_name
)
91 continue # Stay on the same line.
93 # Found a duplicate <include>. Ignore and insert a blank line to
94 # preserve line numbers.
95 self
._lines
[self
._index
] = self
._lines
[self
._index
][:2] + ("",)
98 for i
, line
in enumerate(self
._lines
):
99 self
._lines
[i
] = line
[:2] + (re
.sub(self
._IF
_TAGS
_REG
, "", line
[2]),)
101 self
.contents
= "\n".join(l
[2] for l
in self
._lines
)
103 # Returns a list of tuples in the format: (file, line number, line contents).
104 def _get_file(self
, source_file
):
105 lines
= FileCache
.read(source_file
).splitlines()
106 return [(source_file
, lnum
+ 1, line
) for lnum
, line
in enumerate(lines
)]
108 def _include_file(self
, source_file
):
109 self
._included
_files
.add(source_file
)
110 f
= self
._get
_file
(source_file
)
111 self
._lines
= self
._lines
[:self
._index
] + f
+ self
._lines
[self
._index
+ 1:]
113 def get_file_from_line(self
, line_number
):
114 """Get the original file and line number for an expanded file's line number.
117 line_number: A processed file's line number.
119 line_number
= int(line_number
) - 1
120 return LineNumber(self
._lines
[line_number
][0], self
._lines
[line_number
][1])
123 def included_files(self
):
124 """A list of files that were inlined via <include>."""
125 return self
._included
_files