dovecot/fts: switch to fts-flatcurve

This switches the full-text search plugin from fts-xapian to
fts-flatcurve, the now preferred indexer still powered by Xapian,
which will be integrated into Dovecot core 2.4.

This sets a sane minimal configuration for the plugin with
international language support.

The plugin options marked as "advanced" in Dovecot's documentation
aren't re-exposed for simplicity. They can nevertheless be overridden
by module consumers by directly setting keys with
`services.dovecot2.pluginSettings.fts_*`.

The `fullTextSearch.{memoryLimit,maintenance}` options are removed as
they are no longer needed for this plugin, which tops at 256MB of RAM
usage by default and incrementally optimises its index.

GitLab: closes https://gitlab.com/simple-nixos-mailserver/nixos-mailserver/-/issues/239
This commit is contained in:
euxane 2025-01-24 17:36:40 +01:00
parent 4ba292733d
commit eb15b61e32
4 changed files with 88 additions and 88 deletions

View File

@ -406,36 +406,52 @@ in
'';
};
minSize = mkOption {
type = types.ints.between 3 1000;
default = 3;
description = "Minimum size of search terms";
};
memoryLimit = mkOption {
type = types.nullOr types.int;
default = null;
example = 2000;
description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit.";
languages = mkOption {
type = types.nonEmptyListOf types.str;
default = [ "en" ];
example = [ "en" "de" ];
description = ''
A list of languages that the full text search should detect.
At least one language must be specified.
The language listed first is the default and is used when language recognition fails.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_languages>.
'';
};
maintenance = {
enable = mkOption {
type = types.bool;
default = true;
description = "Regularly optmize indices, as recommended by upstream.";
};
substringSearch = mkOption {
type = types.bool;
default = false;
description = ''
If enabled, allows substring searches.
See <https://doc.dovecot.org/main/core/plugins/fts_flatcurve.html#fts_flatcurve_substring_search>.
'';
};
onCalendar = mkOption {
type = types.str;
default = "daily";
description = "When to run the maintenance job. See systemd.time(7) for more information about the format.";
};
headerExcludes = mkOption {
type = types.listOf types.str;
default = [
"Received"
"DKIM-*"
"X-*"
"Comments"
];
description = ''
The list of headers to exclude.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_header_excludes>.
'';
};
randomizedDelaySec = mkOption {
type = types.int;
default = 1000;
description = "Run the maintenance job not exactly at the time specified with `onCalendar`, but plus or minus this many seconds.";
};
filters = mkOption {
type = types.listOf types.str;
default = [
"normalizer-icu"
"snowball"
"stopwords"
];
description = ''
The list of filters to apply.
<https://doc.dovecot.org/main/core/plugins/fts.html#filter-configuration>.
'';
};
};
@ -1289,6 +1305,21 @@ in
};
imports = [
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "memoryLimit" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "enable" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "onCalendar" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "randomizedDelaySec" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "minSize" ] ''
This option is not supported by fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maxSize" ] ''
This option is not needed since fts-xapian 1.8.3
'')

View File

@ -4,7 +4,7 @@ Full text search
By default, when your IMAP client searches for an email containing some
text in its *body*, dovecot will read all your email sequentially. This
is very slow and IO intensive. To speed body searches up, it is possible to
*index* emails with a plugin to dovecot, ``fts_xapian``.
*index* emails with a plugin to dovecot, ``fts_flatcurve``.
Enabling full text search
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -50,16 +50,15 @@ recommended to move indices in a different location, such as
Indexation itself is rather resouces intensive, in CPU, and for emails with
large headers, in memory as well. Initial indexation of existing emails can take
hours. If the indexer worker is killed or segfaults during indexation, it can
be that it tried to allocate more memory than allowed. You can increase the memory
limit by eg ``mailserver.fullTextSearch.memoryLimit = 2000`` (in MiB).
hours.
Mitigating resources requirements
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You can:
* increase the minimum search term size ``mailserver.fullTextSearch.minSize``
* exclude some headers from indexation with ``mailserver.fullTextSearch.headerExcludes``
* disable expensive token normalisation in ``mailserver.fullTextSearch.filters``
* disable automatic indexation for some folders with
``mailserver.fullTextSearch.autoIndexExclude``. Folders can be specified by
name (``"Trash"``), by special use (``"\\Junk"``) or with a wildcard.

View File

@ -27,6 +27,12 @@ let
# This file contains the ldap bind password
ldapConfFile = "${passwdDir}/dovecot-ldap.conf.ext";
bool2int = x: if x then "1" else "0";
boolToYesNo = x: if x then "yes" else "no";
listToLine = lib.concatStringsSep " ";
listToMultiAttrs = keyPrefix: attrs: lib.listToAttrs (lib.imap1 (n: x: {
name = "${keyPrefix}${if n==1 then "" else toString n}";
value = x;
}) attrs);
maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs";
maildirUTF8FolderNames = lib.optionalString cfg.useUTF8FolderNames ":UTF-8";
@ -145,10 +151,22 @@ let
dovecotModules = [
pkgs.dovecot_pigeonhole
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian;
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot-fts-flatcurve;
# Remove and assume `false` after NixOS 25.05
haveDovecotModulesOption = options.services.dovecot2 ? "modules" && (options.services.dovecot2.modules.visible or true);
ftsPluginSettings = {
fts = "flatcurve";
fts_languages = listToLine cfg.fullTextSearch.languages;
fts_tokenizers = listToLine [ "generic" "email-address" ];
fts_tokenizer_email_address = "maxlen=100"; # default 254 too large for Xapian
fts_flatcurve_substring_search = boolToYesNo cfg.fullTextSearch.substringSearch;
fts_filters = listToLine cfg.fullTextSearch.filters;
fts_header_excludes = listToLine cfg.fullTextSearch.headerExcludes;
fts_autoindex = boolToYesNo cfg.fullTextSearch.autoIndex;
fts_enforced = cfg.fullTextSearch.enforced;
} // (listToMultiAttrs "fts_autoindex_exclude" cfg.fullTextSearch.autoIndexExclude);
in
{
config = with cfg; lib.mkIf enable {
@ -183,14 +201,17 @@ in
sslServerCert = certificatePath;
sslServerKey = keyPath;
enableLmtp = true;
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ];
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [
"fts"
"fts_flatcurve"
];
protocols = lib.optional cfg.enableManageSieve "sieve";
pluginSettings = {
sieve = "file:${cfg.sieveDirectory}/%{user}/scripts;active=${cfg.sieveDirectory}/%{user}/active.sieve";
sieve_default = "file:${cfg.sieveDirectory}/%{user}/default.sieve";
sieve_default_name = "default";
};
} // (lib.optionalAttrs cfg.fullTextSearch.enable ftsPluginSettings);
sieve = {
extensions = [
@ -358,27 +379,6 @@ in
inbox = yes
}
${lib.optionalString cfg.fullTextSearch.enable ''
plugin {
plugin = fts fts_xapian
fts = xapian
fts_xapian = partial=${toString cfg.fullTextSearch.minSize} verbose=${bool2int cfg.debug}
fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"}
${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude}
fts_enforced = ${cfg.fullTextSearch.enforced}
}
service indexer-worker {
${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) ''
vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)}
''}
process_limit = 0
}
''}
lda_mailbox_autosubscribe = yes
lda_mailbox_autocreate = yes
'';
@ -395,29 +395,5 @@ in
};
systemd.services.postfix.restartTriggers = [ genPasswdScript ] ++ (lib.optional cfg.ldap.enable [setPwdInLdapConfFile]);
systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) {
description = "Optimize dovecot indices for fts_xapian";
requisite = [ "dovecot2.service" ];
after = [ "dovecot2.service" ];
startAt = cfg.fullTextSearch.maintenance.onCalendar;
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A";
PrivateDevices = true;
PrivateNetwork = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectSystem = true;
PrivateTmp = true;
};
};
systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) {
timerConfig = {
RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec;
};
};
};
}

View File

@ -81,8 +81,6 @@ pkgs.nixosTest {
# special use depends on https://github.com/NixOS/nixpkgs/pull/93201
autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ];
enforced = "yes";
# fts-xapian warns when memory is low, which makes the test fail
memoryLimit = 100000;
};
};
};
@ -493,11 +491,9 @@ pkgs.nixosTest {
# should fail because this folder is not indexed
client.fail("search Junk a >&2")
# check that search really goes through the indexer
server.succeed(
"journalctl -u dovecot2 | grep -E 'indexer-worker.* Done indexing .INBOX.' >&2"
)
server.succeed("journalctl -u dovecot2 | grep 'fts-flatcurve(INBOX): Query ' >&2")
# check that Junk is not indexed
server.fail("journalctl -u dovecot2 | grep 'indexer-worker' | grep -i 'JUNK' >&2")
server.fail("journalctl -u dovecot2 | grep 'fts-flatcurve(JUNK): Indexing ' >&2")
with subtest("dmarc reporting"):
server.systemctl("start rspamd-dmarc-reporter.service")
@ -507,8 +503,6 @@ pkgs.nixosTest {
server.fail("journalctl -u postfix | grep -i warning >&2")
server.fail("journalctl -u dovecot2 | grep -v 'imap-login: Debug: SSL error: Connection closed' | grep -i error >&2")
# harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html
server.fail(
"journalctl -u dovecot2 |grep -v 'Expunged message reappeared, giving a new UID'| grep -v 'FTS Xapian: Box is empty' | grep -vE 'FTS Xapian:.*does not exist. Creating it' | grep -i warning >&2"
)
server.fail("journalctl -u dovecot2 | grep -v 'Expunged message reappeared, giving a new UID' | grep -i warning >&2")
'';
}