tools/make-usb.py

   1 #!/usr/bin/env python3
   2 #
   3 # make-usb - Creates a file containing vendor and product ids.
   4 # It use the databases from
   5 # - The USB ID Repository: https://usb-ids.gowdy.us (http://www.linux-usb.org), mirrored at Sourceforge
   6 # - libgphoto2 from gPhoto: https://github.com/gphoto/libgphoto2 (http://gphoto.org), available at GitHub
   7 # to create our file epan/dissectors/usb.c
   8
   9 import re
  10 import sys
  11 import urllib.request, urllib.error, urllib.parse
  12
  13 MODE_IDLE           = 0
  14 MODE_VENDOR_PRODUCT = 1
  15 MIN_VENDORS = 3400 # 3409 as of 2020-11-15
  16 MIN_PRODUCTS = 20000 # 20361 as of 2020-11-15
  17
  18 mode = MODE_IDLE
  19
  20 req_headers = { 'User-Agent': 'Wireshark make-usb' }
  21 req = urllib.request.Request('https://sourceforge.net/p/linux-usb/repo/HEAD/tree/trunk/htdocs/usb.ids?format=raw', headers=req_headers)
  22 response = urllib.request.urlopen(req)
  23 lines = response.read().decode('UTF-8', 'replace').splitlines()
  24
  25 vendors  = dict()
  26 products = dict()
  27 vendors_str="static const value_string usb_vendors_vals[] = {\n"
  28 products_str="static const value_string usb_products_vals[] = {\n"
  29
  30 # Escape backslashes, quotes, control characters and non-ASCII characters.
  31 escapes = {}
  32 for i in range(256):
  33     if i in b'\\"':
  34         escapes[i] = '\\%c' % i
  35     elif i in range(0x20, 0x80) or i in b'\t':
  36         escapes[i] = chr(i)
  37     else:
  38         escapes[i] = '\\%03o' % i
  39
  40 for utf8line in lines:
  41     # Convert single backslashes to double (escaped) backslashes, escape quotes, etc.
  42     utf8line = utf8line.rstrip()
  43     utf8line = re.sub(r"\?+", "?", utf8line)
  44     line = ''.join(escapes[byte] for byte in utf8line.encode('utf8'))
  45
  46     if line == "# Vendors, devices and interfaces. Please keep sorted.":
  47         mode = MODE_VENDOR_PRODUCT
  48         continue
  49     elif line == "# List of known device classes, subclasses and protocols":
  50         mode = MODE_IDLE
  51         continue
  52
  53     if mode == MODE_VENDOR_PRODUCT:
  54         if re.match("^[0-9a-f]{4}", line):
  55             last_vendor=line[:4]
  56             vendors[last_vendor] = line[4:].strip()
  57         elif re.match("^\t[0-9a-f]{4}", line):
  58             line = line.strip()
  59             product = "%s%s"%(last_vendor, line[:4])
  60             products[product] = line[4:].strip()
  61
  62 req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/library.c', headers=req_headers)
  63 response = urllib.request.urlopen(req)
  64 lines = response.read().decode('UTF-8', 'replace').splitlines()
  65
  66 mode = MODE_IDLE
  67
  68 for line in lines:
  69     if mode == MODE_IDLE and re.match(r".*\bmodels\[\]", line):
  70         mode = MODE_VENDOR_PRODUCT
  71         continue
  72
  73     if mode == MODE_VENDOR_PRODUCT and re.match(r"};", line):
  74         mode = MODE_IDLE
  75
  76     if mode == MODE_IDLE:
  77         continue
  78
  79     m = re.match(r"\s*{\"(.*):(.*)\",\s*0x([0-9a-fA-F]{4}),\s*0x([0-9a-fA-F]{4}),.*},", line)
  80     if m is not None:
  81         manuf = m.group(1).strip()
  82         model = re.sub(r"\(.*\)", "", m.group(2)).strip()
  83         product = m.group(3) + m.group(4)
  84         products[product] = ' '.join((manuf, model))
  85
  86 req = urllib.request.Request('https://raw.githubusercontent.com/gphoto/libgphoto2/master/camlibs/ptp2/music-players.h', headers=req_headers)
  87 response = urllib.request.urlopen(req)
  88 lines = response.read().decode('UTF-8', 'replace').splitlines()
  89
  90 for line in lines:
  91     m = re.match(r"\s*{\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),\s*\"(.*)\",\s*0x([0-9a-fA-F]{4}),", line)
  92     if m is not None:
  93         manuf = m.group(1).strip()
  94         model = m.group(3).strip()
  95         product = m.group(2) + m.group(4)
  96         products[product] = ' '.join((manuf, model))
  97
  98
  99 if (len(vendors) < MIN_VENDORS):
 100     sys.stderr.write("Not enough vendors: %d\n" % len(vendors))
 101     sys.exit(1)
 102
 103 if (len(products) < MIN_PRODUCTS):
 104     sys.stderr.write("Not enough products: %d\n" % len(products))
 105     sys.exit(1)
 106
 107 vendors = {k.lower(): v for k, v in vendors.items()}
 108 for v in sorted(vendors):
 109     vendors_str += "    { 0x%s, \"%s\" },\n"%(v,vendors[v])
 110
 111 vendors_str += """    { 0, NULL }\n};
 112 value_string_ext ext_usb_vendors_vals = VALUE_STRING_EXT_INIT(usb_vendors_vals);
 113 """
 114
 115 products = {k.lower(): v for k, v in products.items()}
 116 for p in sorted(products):
 117     products_str += "    { 0x%s, \"%s\" },\n"%(p,products[p])
 118
 119 products_str += """    { 0, NULL }\n};
 120 value_string_ext ext_usb_products_vals = VALUE_STRING_EXT_INIT(usb_products_vals);
 121 """
 122
 123 header="""/* usb.c
 124  * USB vendor id and product ids
 125  * This file was generated by running python ./tools/make-usb.py
 126  * Don't change it directly.
 127  *
 128  * Copyright 2012, Michal Labedzki for Tieto Corporation
 129  *
 130  * Other values imported from libghoto2/camlibs/ptp2/library.c, music-players.h
 131  *
 132  * Copyright (C) 2001-2005 Mariusz Woloszyn <emsi@ipartners.pl>
 133  * Copyright (C) 2003-2013 Marcus Meissner <marcus@jet.franken.de>
 134  * Copyright (C) 2005 Hubert Figuiere <hfiguiere@teaser.fr>
 135  * Copyright (C) 2009 Axel Waggershauser <awagger@web.de>
 136  * Copyright (C) 2005-2007 Richard A. Low <richard@wentnet.com>
 137  * Copyright (C) 2005-2012 Linus Walleij <triad@df.lth.se>
 138  * Copyright (C) 2007 Ted Bullock
 139  * Copyright (C) 2012 Sony Mobile Communications AB
 140  *
 141  * Wireshark - Network traffic analyzer
 142  * By Gerald Combs <gerald@wireshark.org>
 143  * Copyright 1998 Gerald Combs
 144  *
 145  * SPDX-License-Identifier: GPL-2.0-or-later
 146  */
 147
 148 /*
 149  * XXX We should probably parse a USB ID file at program start instead
 150  * of generating this file.
 151  */
 152
 153 #include "config.h"
 154 #include <epan/packet.h>
 155 """
 156
 157 f = open('epan/dissectors/usb.c', 'w')
 158 f.write(header)
 159 f.write("\n")
 160 f.write(vendors_str)
 161 f.write("\n\n")
 162 f.write(products_str)
 163 f.write("\n")
 164 f.close()
 165
 166 print("Success!")