pkgs/by-name/cl/cld2/package.nix

   1 {
   2   lib,
   3   stdenv,
   4   fetchFromGitHub,
   5   cmake,
   6   fetchpatch,
   7 }:
   8
   9 stdenv.mkDerivation {
  10   pname = "cld2";
  11   version = "unstable-2015-08-21";
  12
  13   src = fetchFromGitHub {
  14     owner = "CLD2Owners";
  15     repo = "cld2";
  16     rev = "b56fa78a2fe44ac2851bae5bf4f4693a0644da7b";
  17     hash = "sha256-YhXs45IbriKWKULguZM4DgfV/Fzr73VHxA1pFTXCyv8=";
  18   };
  19
  20   patches = [
  21     (fetchpatch {
  22       name = "add-cmakelists.txt";
  23       url = "https://github.com/CLD2Owners/cld2/pull/65/commits/9cfac02c2ac7802ab7079560b38a474473c45f51.patch";
  24       hash = "sha256-uOjmUk8kMFl+wED44ErXoLRyblhgDwFx9K1Wj65Omh8=";
  25     })
  26   ];
  27
  28   nativeBuildInputs = [ cmake ];
  29
  30   meta = with lib; {
  31     homepage = "https://github.com/CLD2Owners/cld2";
  32     description = "Compact Language Detector 2";
  33     longDescription = ''
  34       CLD2 probabilistically detects over 80 languages in Unicode UTF-8 text,
  35       either plain text or HTML/XML. Legacy encodings must be converted to valid
  36       UTF-8 by the caller. For mixed-language input, CLD2 returns the top three
  37       languages found and their approximate percentages of the total text bytes
  38       (e.g. 80% English and 20% French out of 1000 bytes of text means about 800
  39       bytes of English and 200 bytes of French). Optionally, it also returns a
  40       vector of text spans with the language of each identified. This may be
  41       useful for applying different spelling-correction dictionaries or
  42       different machine translation requests to each span. The design target is
  43       web pages of at least 200 characters (about two sentences); CLD2 is not
  44       designed to do well on very short text, lists of proper names, part
  45       numbers, etc.
  46     '';
  47     license = licenses.asl20;
  48     maintainers = with maintainers; [ chvp ];
  49     platforms = platforms.all;
  50   };
  51 }