tesseract: Allow to specify a subset of languages.

Especially useful for our OCR based VM tests, where we only need the
english language. By default the argument is null so all languages are
included. If a list of language name is passed only those languages are
enabled, for example:

tesseract.override { enableLanguages = [ "eng" "spa" ]; };

To only enable support for English and Spanish languages.

Signed-off-by: aszlig <aszlig@redmoonstudios.org>
This commit is contained in:
aszlig 2015-05-22 07:45:59 +02:00
parent 8be00dc71d
commit adb7581459
No known key found for this signature in database
GPG key ID: D0EBD0EC8C2DC961

View file

@ -1,26 +1,31 @@
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff }:
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff
, enableLanguages ? null
}:
with stdenv.lib;
let
majVersion = "3.02";
version = "${majVersion}.02";
f = lang : sha256 : let
src = fetchurl {
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
inherit sha256;
};
in
"tar xfvz ${src} -C $out/share/ --strip=1";
mkLang = lang: sha256: let
src = fetchurl {
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
inherit sha256;
};
in "tar xfvz ${src} -C $out/share/ --strip=1";
extraLanguages = ''
${f "cat" "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9"}
${f "rus" "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709"}
${f "spa" "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l"}
${f "nld" "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy"}
${f "eng" "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461"}
${f "slv" "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr"}
${f "jpn" "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9"}
'';
wantLang = name: const (enableLanguages == null || elem name enableLanguages);
extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang {
cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9";
rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709";
spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l";
nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy";
eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461";
slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr";
jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9";
});
in
stdenv.mkDerivation rec {
@ -40,7 +45,7 @@ stdenv.mkDerivation rec {
'LIBLEPT_HEADERSDIR=${leptonica}/include'
'';
postInstall = extraLanguages;
postInstall = concatStrings extraLanguages;
meta = {
description = "OCR engine";