mirror of
https://github.com/SebastianWendel/nixpkgs.git
synced 2024-09-20 04:19:00 +02:00
Merge pull request #279677 from leona-ya/paperless-nltk
nixos/paperless: use nltk_data package as NLTK data source
This commit is contained in:
commit
f38bca1ca5
|
@ -298,6 +298,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m
|
|||
- Custom themes and other assets that were previously stored in `custom/public/*` now belong in `custom/public/assets/*`
|
||||
- New instances of Gitea using MySQL now ignore the `[database].CHARSET` config option and always use the `utf8mb4` charset, existing instances should migrate via the `gitea doctor convert` CLI command.
|
||||
|
||||
- The `services.paperless` module no longer uses the previously downloaded NLTK data stored in `/var/cache/paperless/nltk`. This directory can be removed.
|
||||
|
||||
- The `hardware.pulseaudio` module now sets permission of pulse user home directory to 755 when running in "systemWide" mode. It fixes [issue 114399](https://github.com/NixOS/nixpkgs/issues/114399).
|
||||
|
||||
- The `btrbk` module now automatically selects and provides required compression
|
||||
|
|
|
@ -6,7 +6,6 @@ let
|
|||
pkg = cfg.package;
|
||||
|
||||
defaultUser = "paperless";
|
||||
nltkDir = "/var/cache/paperless/nltk";
|
||||
defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf";
|
||||
|
||||
# Don't start a redis instance if the user sets a custom redis connection
|
||||
|
@ -17,13 +16,17 @@ let
|
|||
PAPERLESS_DATA_DIR = cfg.dataDir;
|
||||
PAPERLESS_MEDIA_ROOT = cfg.mediaDir;
|
||||
PAPERLESS_CONSUMPTION_DIR = cfg.consumptionDir;
|
||||
PAPERLESS_NLTK_DIR = nltkDir;
|
||||
PAPERLESS_THUMBNAIL_FONT_NAME = defaultFont;
|
||||
GUNICORN_CMD_ARGS = "--bind=${cfg.address}:${toString cfg.port}";
|
||||
} // optionalAttrs (config.time.timeZone != null) {
|
||||
PAPERLESS_TIME_ZONE = config.time.timeZone;
|
||||
} // optionalAttrs enableRedis {
|
||||
PAPERLESS_REDIS = "unix://${redisServer.unixSocket}";
|
||||
} // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) {
|
||||
PAPERLESS_NLTK_DIR = pkgs.symlinkJoin {
|
||||
name = "paperless_ngx_nltk_data";
|
||||
paths = pkg.nltkData;
|
||||
};
|
||||
} // (lib.mapAttrs (_: s:
|
||||
if (lib.isAttrs s || lib.isList s) then builtins.toJSON s
|
||||
else if lib.isBool s then lib.boolToString s
|
||||
|
@ -292,23 +295,6 @@ in
|
|||
};
|
||||
};
|
||||
|
||||
# Download NLTK corpus data
|
||||
systemd.services.paperless-download-nltk-data = {
|
||||
wantedBy = [ "paperless-scheduler.service" ];
|
||||
before = [ "paperless-scheduler.service" ];
|
||||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
serviceConfig = defaultServiceConfig // {
|
||||
User = cfg.user;
|
||||
Type = "oneshot";
|
||||
# Enable internet access
|
||||
PrivateNetwork = false;
|
||||
ExecStart = let pythonWithNltk = pkg.python.withPackages (ps: [ ps.nltk ]); in ''
|
||||
${pythonWithNltk}/bin/python -m nltk.downloader -d '${nltkDir}' punkt snowball_data stopwords
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.paperless-consumer = {
|
||||
description = "Paperless document consumer";
|
||||
# Bind to `paperless-scheduler` so that the consumer never runs
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
, xcbuild
|
||||
, pango
|
||||
, pkg-config
|
||||
, nltk-data
|
||||
}:
|
||||
|
||||
let
|
||||
|
@ -293,6 +294,7 @@ python.pkgs.buildPythonApplication rec {
|
|||
|
||||
passthru = {
|
||||
inherit python path frontend;
|
||||
nltkData = with nltk-data; [ punkt snowball_data stopwords ];
|
||||
tests = { inherit (nixosTests) paperless; };
|
||||
};
|
||||
|
||||
|
|
|
@ -48,6 +48,11 @@ lib.makeScope newScope (self: {
|
|||
location = "taggers";
|
||||
hash = "sha256-ilTs4HWPUoHxQb4kWEy3wJ6QsE/98+EQya44gtV2inw=";
|
||||
});
|
||||
snowball_data = makeNltkDataPackage ({
|
||||
pname = "snowball_data";
|
||||
location = "stemmers";
|
||||
hash = "sha256-Y6LERPtaRbCtWmJCvMAd2xH02xdrevZBFNYvP9N4+3s=";
|
||||
});
|
||||
stopwords = makeNltkDataPackage ({
|
||||
pname = "stopwords";
|
||||
location = "corpora";
|
||||
|
|
Loading…
Reference in a new issue