1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 from collections
import defaultdict
8 from future
import Future
9 from path_util
import SplitParent
10 from special_paths
import SITE_VERIFICATION_FILE
12 def _Normalize(file_name
, splittext
=False):
13 normalized
= file_name
15 normalized
= posixpath
.splitext(file_name
)[0]
16 normalized
= normalized
.replace('.', '').replace('-', '').replace('_', '')
17 return normalized
.lower()
19 def _CommonNormalizedPrefix(first_file
, second_file
):
20 return posixpath
.commonprefix((_Normalize(first_file
),
21 _Normalize(second_file
)))
24 class PathCanonicalizer(object):
25 '''Transforms paths into their canonical forms. Since the docserver has had
26 many incarnations - e.g. there didn't use to be apps/ - there may be old
27 paths lying around the webs. We try to redirect those to where they are now.
33 # |strip_extensions| is a list of file extensions (e.g. .html) that should
34 # be stripped for a path's canonical form.
35 self
._cache
= object_store_creator
.Create(
36 PathCanonicalizer
, category
=file_system
.GetIdentity())
37 self
._file
_system
= file_system
38 self
._strip
_extensions
= strip_extensions
42 # |canonical_paths| is the pre-calculated set of canonical paths.
43 # |simplified_paths_map| is a lazily populated mapping of simplified file
44 # names to a list of full paths that contain them. For example,
45 # - browseraction: [extensions/browserAction.html]
46 # - storage: [apps/storage.html, extensions/storage.html]
47 canonical_paths
, simplified_paths_map
= (
48 cached
.get('canonical_paths'), cached
.get('simplified_paths_map'))
50 if canonical_paths
is None:
51 assert simplified_paths_map
is None
52 canonical_paths
= set()
53 simplified_paths_map
= defaultdict(list)
54 for base
, dirs
, files
in self
._file
_system
.Walk(''):
55 for path
in dirs
+ files
:
56 path_without_ext
, ext
= posixpath
.splitext(path
)
57 canonical_path
= posixpath
.join(base
, path_without_ext
)
58 if (ext
not in self
._strip
_extensions
or
59 path
== SITE_VERIFICATION_FILE
):
61 canonical_paths
.add(canonical_path
)
62 simplified_paths_map
[_Normalize(path
, splittext
=True)].append(
64 # Store |simplified_paths_map| sorted. Ties in length are broken by
65 # taking the shortest, lexicographically smallest path.
66 for path_list
in simplified_paths_map
.itervalues():
67 path_list
.sort(key
=lambda p
: (len(p
), p
))
68 self
._cache
.SetMulti({
69 'canonical_paths': canonical_paths
,
70 'simplified_paths_map': simplified_paths_map
,
73 assert simplified_paths_map
is not None
75 return canonical_paths
, simplified_paths_map
76 return self
._cache
.GetMulti(('canonical_paths',
77 'simplified_paths_map')).Then(load
)
80 def Canonicalize(self
, path
):
81 '''Returns the canonical path for |path|.
83 canonical_paths
, simplified_paths_map
= self
._LoadCache
().Get()
85 # Path may already be the canonical path.
86 if path
in canonical_paths
:
89 # Path not found. Our single heuristic: find |base| in the directory
90 # structure with the longest common prefix of |path|.
91 _
, base
= SplitParent(path
)
93 # Paths with a non-extension dot separator lose information in
94 # _SimplifyFileName, so we try paths both with and without the dot to
95 # maximize the possibility of finding the right path.
97 simplified_paths_map
.get(_Normalize(base
), []) +
98 simplified_paths_map
.get(_Normalize(base
, splittext
=True), []))
100 if potential_paths
== []:
101 # There is no file with anything close to that name.
104 # The most likely canonical file is the one with the longest common prefix
105 # with |path|. This is slightly weaker than it could be; |path| is
106 # compared without symbols, not the simplified form of |path|,
108 max_prefix
= potential_paths
[0]
109 max_prefix_length
= len(_CommonNormalizedPrefix(max_prefix
, path
))
110 for path_for_file
in potential_paths
[1:]:
111 prefix_length
= len(_CommonNormalizedPrefix(path_for_file
, path
))
112 if prefix_length
> max_prefix_length
:
113 max_prefix
, max_prefix_length
= path_for_file
, prefix_length
118 return self
._LoadCache
()