3 # Wireshark - Network traffic analyzer
4 # By Gerald Combs <gerald@wireshark.org>
5 # Copyright 1998 Gerald Combs
7 # SPDX-License-Identifier: GPL-2.0-or-later
8 '''Update the "manuf" file.
10 Make-manuf creates a file containing ethernet OUIs and their company
11 IDs from the databases at IEEE.
20 import urllib
.request
, urllib
.error
, urllib
.parse
24 # Use the grapheme or segments module instead?
30 def exit_msg(msg
=None, status
=1):
32 sys
.stderr
.write(msg
+ '\n\n')
33 sys
.stderr
.write(__doc__
+ '\n')
38 Returns a tuple containing the body and response dict. The body is a
39 str in Python 3 and bytes in Python 2 in order to be compatible with
44 url_path
= os
.path
.join(sys
.argv
[1], url
[1])
45 url_fd
= open(url_path
)
49 url_path
= '/'.join(url
)
51 req_headers
= { 'User-Agent': 'Wireshark make-manuf' }
53 req
= urllib
.request
.Request(url_path
, headers
=req_headers
)
54 response
= urllib
.request
.urlopen(req
)
55 body
= response
.read().decode('UTF-8', 'replace').replace(u
'\u200e', '')
57 exit_msg('Error opening ' + url_path
)
61 # These are applied after punctuation has been removed.
62 # More examples at https://en.wikipedia.org/wiki/Incorporation_(business)
63 general_terms
= '|'.join([
64 ' a +s\\b', # A/S and A.S. but not "As" as in "Connect As".
65 ' ab\\b', # Also follows "Oy", which is covered below.
68 ' closed joint stock company\\b',
74 ' de c ?v\\b', # Follows "S.A.", which is covered separately below.
81 ' k k\\b', # "K.K." as in "kabushiki kaisha", but not "K+K" as in "K+K Messtechnik".
88 ' open joint stock company\\b',
107 # Chinese company names tend to start with the location, skip it (non-exhaustive list).
120 # Special cases handled directly
122 "Advanced Micro Devices": "AMD",
123 "杭州德澜科技有限公司": "DelanTech" # 杭州德澜科技有限公司(HangZhou Delan Technology Co.,Ltd)
127 '''Convert a long manufacturer name to abbreviated and short names'''
128 # Normalize whitespace.
129 manuf
= ' '.join(manuf
.split())
131 # Convert all caps to title case
133 manuf
= manuf
.title()
134 # Remove the contents of parenthesis as ancillary data
135 manuf
= re
.sub(r
"\(.*\)", '', manuf
)
136 # Remove the contents of fullwidth parenthesis (mostly in Asian names)
137 manuf
= re
.sub(r
"(.*)", '', manuf
)
138 # Remove "a" before removing punctuation ("Aruba, a Hewlett [...]" etc.)
139 manuf
= manuf
.replace(" a ", " ")
140 # Remove any punctuation
141 # XXX Use string.punctuation? Note that it includes '-' and '*'.
142 manuf
= re
.sub(r
"[\"',./:()+-]", ' ', manuf)
143 # XXX For some reason including the double angle brackets in the above
144 # regex makes it bomb
145 manuf = re.sub(r"[«»“”]", ' ', manuf)
146 # & isn't needed when Standalone
147 manuf
= manuf
.replace(" & ", " ")
148 # Remove business types and other general terms ("the", "inc", "plc", etc.)
149 plain_manuf
= re
.sub(general_terms
, '', manuf
, flags
=re
.IGNORECASE
)
150 # ...but make sure we don't remove everything.
151 if not all(s
== ' ' for s
in plain_manuf
):
154 manuf
= manuf
.strip()
156 # Check for special case
157 if manuf
in special_case
.keys():
158 manuf
= special_case
[manuf
]
160 # XXX: Some of the entries have Chinese city or other location
161 # names written with spaces between each character, like
162 # Bei jing, Wu Han, Shen Zhen, etc. We should remove that too.
163 split
= manuf
.split()
164 if len(split
) > 1 and split
[0].lower() in skip_start
:
165 manuf
= ' '.join(split
[1:])
168 manuf
= re
.sub(r
'\s+', '', manuf
)
171 sys
.stderr
.write('Manufacturer "{}" shortened to nothing.\n'.format(orig_manuf
))
174 # Truncate names to a reasonable length, say, 12 characters. If
175 # the string contains UTF-8, this may be substantially more than
176 # 12 bytes. It might also be less than 12 visible characters. Plain
177 # Python slices Unicode strings by code point, which is better
178 # than raw bytes but not as good as grapheme clusters. PyICU
179 # supports grapheme clusters. https://bugs.python.org/issue30717
182 # Truncate by code points
186 # Truncate by grapheme clusters
187 bi_ci
= icu
.BreakIterator
.createCharacterInstance(icu
.Locale('en_US'))
190 bounds
= bounds
[0:trunc_len
]
191 trunc_len
= bounds
[-1]
193 manuf
= manuf
[:trunc_len
]
195 if manuf
.lower() == orig_manuf
.lower():
196 # Original manufacturer name was short and simple.
199 mixed_manuf
= orig_manuf
200 # At least one entry has whitespace in front of a period.
201 mixed_manuf
= re
.sub(r
'\s+\.', '.', mixed_manuf
)
202 #If company is all caps, convert to mixed case (so it doesn't look like we're screaming the company name)
203 if mixed_manuf
.upper() == mixed_manuf
:
204 mixed_manuf
= mixed_manuf
.title()
206 return [manuf
, mixed_manuf
]
212 def prefix_to_oui(prefix
, prefix_map
):
213 pfx_len
= int(len(prefix
) * 8 / 2)
214 prefix24
= prefix
[:6]
215 oui24
= ':'.join(hi
+ lo
for hi
, lo
in zip(prefix24
[0::2], prefix24
[1::2]))
218 # 24-bit OUI assignment, no mask
221 # Other lengths which require a mask.
222 oui
= prefix
.ljust(12, '0')
223 oui
= ':'.join(hi
+ lo
for hi
, lo
in zip(oui
[0::2], oui
[1::2]))
228 prefix_map
[oui24
] = kind
230 return '{}/{:d}'.format(oui
, int(pfx_len
)), kind
233 manuf_path
= os
.path
.join(os
.path
.dirname(__file__
), '..', 'epan', 'manuf-data.c')
236 'OUI': { 'url': ["https://standards-oui.ieee.org/oui/", "oui.csv"], 'min_entries': 1000 },
237 'CID': { 'url': ["https://standards-oui.ieee.org/cid/", "cid.csv"], 'min_entries': 75 },
238 'IAB': { 'url': ["https://standards-oui.ieee.org/iab/", "iab.csv"], 'min_entries': 1000 },
239 'OUI28': { 'url': ["https://standards-oui.ieee.org/oui28/", "mam.csv"], 'min_entries': 1000 },
240 'OUI36': { 'url': ["https://standards-oui.ieee.org/oui36/", "oui36.csv"], 'min_entries': 1000 },
243 MA_L
: { '00:00:00' : ['00:00:00', 'Officially Xerox, but 0:0:0:0:0:0 is more common'] },
248 min_total
= 35000 # 35830 as of 2018-09-05
251 # Add IEEE entries from each of their databases
252 ieee_db_l
= ['OUI', 'OUI28', 'OUI36', 'CID', 'IAB']
254 # map a 24-bit prefix to MA-M/MA-S or none (MA-L by default)
258 db_url
= ieee_d
[db
]['url']
259 ieee_d
[db
]['skipped'] = 0
260 ieee_d
[db
]['added'] = 0
261 ieee_d
[db
]['total'] = 0
262 print('Merging {} data from {}'.format(db
, db_url
))
263 body
= open_url(db_url
)
264 ieee_csv
= csv
.reader(body
.splitlines())
268 for ieee_row
in ieee_csv
:
269 #Registry,Assignment,Organization Name,Organization Address
270 #IAB,0050C2DD6,Transas Marine Limited,Datavagen 37 Askim Vastra Gotaland SE 436 32
271 oui
, kind
= prefix_to_oui(ieee_row
[1].upper(), prefix_map
)
272 manuf
= ieee_row
[2].strip()
273 # The Organization Name field occasionally contains HTML entities. Undo them.
274 manuf
= html
.unescape(manuf
)
276 manuf
= manuf
.replace('\\', '/')
277 if manuf
== 'IEEE Registration Authority':
278 # These are held for subdivision into MA-M/MA-S
280 #if manuf == 'Private':
282 if oui
in oui_d
[kind
]:
284 print('{} - {} IEEE "{}" in favor of "{}"'.format(oui
, action
, manuf
, oui_d
[kind
][oui
]))
285 ieee_d
[db
]['skipped'] += 1
287 oui_d
[kind
][oui
] = shorten(manuf
)
288 ieee_d
[db
]['added'] += 1
289 ieee_d
[db
]['total'] += 1
291 if ieee_d
[db
]['total'] < ieee_d
[db
]['min_entries']:
292 exit_msg("Too few {} entries. Got {}, wanted {}".format(db
, ieee_d
[db
]['total'], ieee_d
[db
]['min_entries']))
293 total_added
+= ieee_d
[db
]['total']
295 if total_added
< min_total
:
296 exit_msg("Too few total entries ({})".format(total_added
))
299 manuf_fd
= io
.open(manuf_path
, 'w', encoding
='UTF-8')
301 exit_msg("Couldn't open manuf file for reading ({}) ".format(manuf_path
))
304 * This file was generated by running ./tools/make-manuf.py.
306 * SPDX-License-Identifier: GPL-2.0-or-later
308 * The data below has been assembled from the following sources:
310 * The IEEE public OUI listings available from:
311 * <http://standards-oui.ieee.org/oui/oui.csv>
312 * <http://standards-oui.ieee.org/cid/cid.csv>
313 * <http://standards-oui.ieee.org/iab/iab.csv>
314 * <http://standards-oui.ieee.org/oui28/mam.csv>
315 * <http://standards-oui.ieee.org/oui36/oui36.csv>
321 # Write the prefix map
322 manuf_fd
.write("static const manuf_registry_t ieee_registry_table[] = {\n")
323 keys
= list(prefix_map
.keys())
326 manuf_fd
.write(" {{ {{ 0x{}, 0x{}, 0x{} }}, {} }},\n".format(oui
[0:2], oui
[3:5], oui
[6:8], prefix_map
[oui
]))
327 manuf_fd
.write("};\n\n")
329 # write the MA-L table
330 manuf_fd
.write("static const manuf_oui24_t global_manuf_oui24_table[] = {\n")
331 keys
= list(oui_d
[MA_L
].keys())
334 short
= oui_d
[MA_L
][oui
][0]
335 if oui_d
[MA_L
][oui
][1]:
336 long = oui_d
[MA_L
][oui
][1]
339 line
= " {{ {{ 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui
[0:2], oui
[3:5], oui
[6:8], short
)
344 line
+= "\"{}\" }},\n".format(long.replace('"', '\\"'))
346 manuf_fd
.write("};\n\n")
348 # write the MA-M table
349 manuf_fd
.write("static const manuf_oui28_t global_manuf_oui28_table[] = {\n")
350 keys
= list(oui_d
[MA_M
].keys())
353 short
= oui_d
[MA_M
][oui
][0]
354 if oui_d
[MA_M
][oui
][1]:
355 long = oui_d
[MA_M
][oui
][1]
358 line
= " {{ {{ 0x{}, 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui
[0:2], oui
[3:5], oui
[6:8], oui
[9:11], short
)
363 line
+= "\"{}\" }},\n".format(long.replace('"', '\\"'))
365 manuf_fd
.write("};\n\n")
367 #write the MA-S table
368 manuf_fd
.write("static const manuf_oui36_t global_manuf_oui36_table[] = {\n")
369 keys
= list(oui_d
[MA_S
].keys())
372 short
= oui_d
[MA_S
][oui
][0]
373 if oui_d
[MA_S
][oui
][1]:
374 long = oui_d
[MA_S
][oui
][1]
377 line
= " {{ {{ 0x{}, 0x{}, 0x{}, 0x{}, 0x{} }}, \"{}\", ".format(oui
[0:2], oui
[3:5], oui
[6:8], oui
[9:11], oui
[12:14], short
)
382 line
+= "\"{}\" }},\n".format(long.replace('"', '\\"'))
384 manuf_fd
.write("};\n")
389 print('{:<20}: {}'.format('IEEE ' + db
+ ' added', ieee_d
[db
]['added']))
390 print('{:<20}: {}'.format('Total added', total_added
))
394 print('{:<20}: {}'.format('IEEE ' + db
+ ' total', ieee_d
[db
]['total']))
398 print('{:<20}: {}'.format('IEEE ' + db
+ ' skipped', ieee_d
[db
]['skipped']))
400 if __name__
== '__main__':