Merge branch 'master-dovecot-fts-flatcurve' into 'master'

dovecot/fts: switch to fts-flatcurve

Closes #239

See merge request simple-nixos-mailserver/nixos-mailserver!361
This commit is contained in:
euxane 2025-03-21 19:49:22 +01:00
commit bc0cdcb6df
6 changed files with 112 additions and 97 deletions

View File

@ -406,36 +406,52 @@ in
'';
};
minSize = mkOption {
type = types.ints.between 3 1000;
default = 3;
description = "Minimum size of search terms";
};
memoryLimit = mkOption {
type = types.nullOr types.int;
default = null;
example = 2000;
description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit.";
languages = mkOption {
type = types.nonEmptyListOf types.str;
default = [ "en" ];
example = [ "en" "de" ];
description = ''
A list of languages that the full text search should detect.
At least one language must be specified.
The language listed first is the default and is used when language recognition fails.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_languages>.
'';
};
maintenance = {
enable = mkOption {
type = types.bool;
default = true;
description = "Regularly optmize indices, as recommended by upstream.";
};
substringSearch = mkOption {
type = types.bool;
default = false;
description = ''
If enabled, allows substring searches.
See <https://doc.dovecot.org/main/core/plugins/fts_flatcurve.html#fts_flatcurve_substring_search>.
'';
};
onCalendar = mkOption {
type = types.str;
default = "daily";
description = "When to run the maintenance job. See systemd.time(7) for more information about the format.";
};
headerExcludes = mkOption {
type = types.listOf types.str;
default = [
"Received"
"DKIM-*"
"X-*"
"Comments"
];
description = ''
The list of headers to exclude.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_header_excludes>.
'';
};
randomizedDelaySec = mkOption {
type = types.int;
default = 1000;
description = "Run the maintenance job not exactly at the time specified with `onCalendar`, but plus or minus this many seconds.";
};
filters = mkOption {
type = types.listOf types.str;
default = [
"normalizer-icu"
"snowball"
"stopwords"
];
description = ''
The list of filters to apply.
<https://doc.dovecot.org/main/core/plugins/fts.html#filter-configuration>.
'';
};
};
@ -1289,6 +1305,21 @@ in
};
imports = [
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "memoryLimit" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "enable" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "onCalendar" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "randomizedDelaySec" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "minSize" ] ''
This option is not supported by fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maxSize" ] ''
This option is not needed since fts-xapian 1.8.3
'')

View File

@ -4,7 +4,7 @@ Full text search
By default, when your IMAP client searches for an email containing some
text in its *body*, dovecot will read all your email sequentially. This
is very slow and IO intensive. To speed body searches up, it is possible to
*index* emails with a plugin to dovecot, ``fts_xapian``.
*index* emails with a plugin to dovecot, ``fts_flatcurve``.
Enabling full text search
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -50,16 +50,15 @@ recommended to move indices in a different location, such as
Indexation itself is rather resouces intensive, in CPU, and for emails with
large headers, in memory as well. Initial indexation of existing emails can take
hours. If the indexer worker is killed or segfaults during indexation, it can
be that it tried to allocate more memory than allowed. You can increase the memory
limit by eg ``mailserver.fullTextSearch.memoryLimit = 2000`` (in MiB).
hours.
Mitigating resources requirements
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You can:
* increase the minimum search term size ``mailserver.fullTextSearch.minSize``
* exclude some headers from indexation with ``mailserver.fullTextSearch.headerExcludes``
* disable expensive token normalisation in ``mailserver.fullTextSearch.filters``
* disable automatic indexation for some folders with
``mailserver.fullTextSearch.autoIndexExclude``. Folders can be specified by
name (``"Trash"``), by special use (``"\\Junk"``) or with a wildcard.

View File

@ -1,6 +1,21 @@
Release Notes
=============
NixOS 25.05
-----------
- Switch to the more efficient `fts-flatcurve` indexer for full text search
(`merge request <https://gitlab.com/simple-nixos-mailserver/nixos-mailserver/-/merge_requests/361>`__).
This makes use of a new index, which will be automatically re-generated the
next time a folder is searched.
The operation is now quick enough to be performed "just-in-time".
Alternatively, all indices can be immediately re-generated for all users and
folders by running
`doveadm fts rescan -u '*' && doveadm index -u '*' -q '*'`.
The previous index (which is not automatically discarded to allow rollbacks)
can be cleaned up by removing all the `xapian-indexes` directories within
`mailserver.indexDir`.
NixOS 24.11
-----------

18
flake.lock generated
View File

@ -19,11 +19,11 @@
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1696426674,
"narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
"lastModified": 1733328505,
"narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
"owner": "edolstra",
"repo": "flake-compat",
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
"rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
"type": "github"
},
"original": {
@ -34,11 +34,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1732014248,
"narHash": "sha256-y/MEyuJ5oBWrWAic/14LaIr/u5E0wRVzyYsouYY3W6w=",
"lastModified": 1739214665,
"narHash": "sha256-26L8VAu3/1YRxS8MHgBOyOM8xALdo6N0I04PgorE7UM=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "23e89b7da85c3640bbc2173fe04f4bd114342367",
"rev": "64e75cd44acf21c7933d61d7721e812eac1b5a0a",
"type": "github"
},
"original": {
@ -49,11 +49,11 @@
},
"nixpkgs-24_11": {
"locked": {
"lastModified": 1734083684,
"narHash": "sha256-5fNndbndxSx5d+C/D0p/VF32xDiJCJzyOqorOYW4JEo=",
"lastModified": 1739357830,
"narHash": "sha256-9xim3nJJUFbVbJCz48UP4fGRStVW5nv4VdbimbKxJ3I=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "314e12ba369ccdb9b352a4db26ff419f7c49fa84",
"rev": "0ff09db9d034a04acd4e8908820ba0b410d7a33a",
"type": "github"
},
"original": {

View File

@ -27,6 +27,12 @@ let
# This file contains the ldap bind password
ldapConfFile = "${passwdDir}/dovecot-ldap.conf.ext";
bool2int = x: if x then "1" else "0";
boolToYesNo = x: if x then "yes" else "no";
listToLine = lib.concatStringsSep " ";
listToMultiAttrs = keyPrefix: attrs: lib.listToAttrs (lib.imap1 (n: x: {
name = "${keyPrefix}${if n==1 then "" else toString n}";
value = x;
}) attrs);
maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs";
maildirUTF8FolderNames = lib.optionalString cfg.useUTF8FolderNames ":UTF-8";
@ -145,10 +151,22 @@ let
dovecotModules = [
pkgs.dovecot_pigeonhole
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian;
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot-fts-flatcurve;
# Remove and assume `false` after NixOS 25.05
haveDovecotModulesOption = options.services.dovecot2 ? "modules" && (options.services.dovecot2.modules.visible or true);
ftsPluginSettings = {
fts = "flatcurve";
fts_languages = listToLine cfg.fullTextSearch.languages;
fts_tokenizers = listToLine [ "generic" "email-address" ];
fts_tokenizer_email_address = "maxlen=100"; # default 254 too large for Xapian
fts_flatcurve_substring_search = boolToYesNo cfg.fullTextSearch.substringSearch;
fts_filters = listToLine cfg.fullTextSearch.filters;
fts_header_excludes = listToLine cfg.fullTextSearch.headerExcludes;
fts_autoindex = boolToYesNo cfg.fullTextSearch.autoIndex;
fts_enforced = cfg.fullTextSearch.enforced;
} // (listToMultiAttrs "fts_autoindex_exclude" cfg.fullTextSearch.autoIndexExclude);
in
{
config = with cfg; lib.mkIf enable {
@ -183,14 +201,17 @@ in
sslServerCert = certificatePath;
sslServerKey = keyPath;
enableLmtp = true;
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ];
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [
"fts"
"fts_flatcurve"
];
protocols = lib.optional cfg.enableManageSieve "sieve";
pluginSettings = {
sieve = "file:${cfg.sieveDirectory}/%{user}/scripts;active=${cfg.sieveDirectory}/%{user}/active.sieve";
sieve_default = "file:${cfg.sieveDirectory}/%{user}/default.sieve";
sieve_default_name = "default";
};
} // (lib.optionalAttrs cfg.fullTextSearch.enable ftsPluginSettings);
sieve = {
extensions = [
@ -358,27 +379,6 @@ in
inbox = yes
}
${lib.optionalString cfg.fullTextSearch.enable ''
plugin {
plugin = fts fts_xapian
fts = xapian
fts_xapian = partial=${toString cfg.fullTextSearch.minSize} verbose=${bool2int cfg.debug}
fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"}
${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude}
fts_enforced = ${cfg.fullTextSearch.enforced}
}
service indexer-worker {
${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) ''
vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)}
''}
process_limit = 0
}
''}
lda_mailbox_autosubscribe = yes
lda_mailbox_autocreate = yes
'';
@ -395,29 +395,5 @@ in
};
systemd.services.postfix.restartTriggers = [ genPasswdScript ] ++ (lib.optional cfg.ldap.enable [setPwdInLdapConfFile]);
systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) {
description = "Optimize dovecot indices for fts_xapian";
requisite = [ "dovecot2.service" ];
after = [ "dovecot2.service" ];
startAt = cfg.fullTextSearch.maintenance.onCalendar;
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A";
PrivateDevices = true;
PrivateNetwork = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectSystem = true;
PrivateTmp = true;
};
};
systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) {
timerConfig = {
RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec;
};
};
};
}

View File

@ -81,8 +81,6 @@ pkgs.nixosTest {
# special use depends on https://github.com/NixOS/nixpkgs/pull/93201
autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ];
enforced = "yes";
# fts-xapian warns when memory is low, which makes the test fail
memoryLimit = 100000;
};
};
};
@ -493,11 +491,9 @@ pkgs.nixosTest {
# should fail because this folder is not indexed
client.fail("search Junk a >&2")
# check that search really goes through the indexer
server.succeed(
"journalctl -u dovecot2 | grep -E 'indexer-worker.* Done indexing .INBOX.' >&2"
)
server.succeed("journalctl -u dovecot2 | grep 'fts-flatcurve(INBOX): Query ' >&2")
# check that Junk is not indexed
server.fail("journalctl -u dovecot2 | grep 'indexer-worker' | grep -i 'JUNK' >&2")
server.fail("journalctl -u dovecot2 | grep 'fts-flatcurve(JUNK): Indexing ' >&2")
with subtest("dmarc reporting"):
server.systemctl("start rspamd-dmarc-reporter.service")
@ -507,8 +503,6 @@ pkgs.nixosTest {
server.fail("journalctl -u postfix | grep -i warning >&2")
server.fail("journalctl -u dovecot2 | grep -v 'imap-login: Debug: SSL error: Connection closed' | grep -i error >&2")
# harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html
server.fail(
"journalctl -u dovecot2 |grep -v 'Expunged message reappeared, giving a new UID'| grep -v 'FTS Xapian: Box is empty' | grep -vE 'FTS Xapian:.*does not exist. Creating it' | grep -i warning >&2"
)
server.fail("journalctl -u dovecot2 | grep -v 'Expunged message reappeared, giving a new UID' | grep -i warning >&2")
'';
}