dovecot/fts: switch to fts-flatcurve

This switches the full-text search plugin from fts-xapian to
fts-flatcurve, the now preferred indexer still powered by Xapian,
which will be integrated into Dovecot core 2.4.

This sets a sane minimal configuration for the plugin with
international language support.

The plugin options marked as "advanced" in Dovecot's documentation
aren't re-exposed for simplicity. They can nevertheless be overridden
by module consumers by directly setting keys with
`services.dovecot2.pluginSettings.fts_*`.

The `fullTextSearch.maintenance` option is removed as the index is now
incrementally optimised in the background.

GitLab: closes https://gitlab.com/simple-nixos-mailserver/nixos-mailserver/-/issues/239
This commit is contained in:
euxane 2025-01-24 17:36:40 +01:00
parent 433520257a
commit 2ed7a94782
4 changed files with 101 additions and 87 deletions

View File

@ -380,7 +380,21 @@ in
};
fullTextSearch = {
enable = mkEnableOption "Full text search indexing with xapian. This has significant performance and disk space cost.";
enable = mkEnableOption ''
Full text search indexing with Xapian through the fts_flatcurve plugin.
This has significant performance and disk space cost.
'';
memoryLimit = mkOption {
type = types.nullOr types.int;
default = null;
example = 2000;
description = ''
Memory limit for the indexer process, in MiB.
If null, leaves the default (which is rather low),
and if 0, no limit.
'';
};
autoIndex = mkOption {
type = types.bool;
default = true;
@ -406,36 +420,54 @@ in
'';
};
minSize = mkOption {
type = types.ints.between 3 1000;
default = 3;
description = "Minimum size of search terms";
};
memoryLimit = mkOption {
type = types.nullOr types.int;
default = null;
example = 2000;
description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit.";
languages = mkOption {
type = types.nonEmptyListOf types.str;
default = [ "en" ];
example = [ "en" "de" ];
description = ''
A list of languages that the full text search should detect.
At least one language must be specified.
The language listed first is the default and is used when language recognition fails.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_languages>.
'';
};
maintenance = {
enable = mkOption {
type = types.bool;
default = true;
description = "Regularly optmize indices, as recommended by upstream.";
};
substringSearch = mkOption {
type = types.bool;
default = false;
description = ''
If enabled, allows substring searches.
See <https://doc.dovecot.org/main/core/plugins/fts_flatcurve.html#fts_flatcurve_substring_search>.
onCalendar = mkOption {
type = types.str;
default = "daily";
description = "When to run the maintenance job. See systemd.time(7) for more information about the format.";
};
Enabling this requires significant additional storage space.
'';
};
randomizedDelaySec = mkOption {
type = types.int;
default = 1000;
description = "Run the maintenance job not exactly at the time specified with `onCalendar`, but plus or minus this many seconds.";
};
headerExcludes = mkOption {
type = types.listOf types.str;
default = [
"Received"
"DKIM-*"
"X-*"
"Comments"
];
description = ''
The list of headers to exclude.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_header_excludes>.
'';
};
filters = mkOption {
type = types.listOf types.str;
default = [
"normalizer-icu"
"snowball"
"stopwords"
];
description = ''
The list of filters to apply.
<https://doc.dovecot.org/main/core/plugins/fts.html#filter-configuration>.
'';
};
};
@ -1269,6 +1301,18 @@ in
};
imports = [
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "enable" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "onCalendar" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "randomizedDelaySec" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "minSize" ] ''
This option is not supported by fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maxSize" ] ''
This option is not needed since fts-xapian 1.8.3
'')

View File

@ -4,7 +4,7 @@ Full text search
By default, when your IMAP client searches for an email containing some
text in its *body*, dovecot will read all your email sequentially. This
is very slow and IO intensive. To speed body searches up, it is possible to
*index* emails with a plugin to dovecot, ``fts_xapian``.
*index* emails with a plugin to dovecot, ``fts_flatcurve``.
Enabling full text search
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -59,7 +59,8 @@ Mitigating resources requirements
You can:
* increase the minimum search term size ``mailserver.fullTextSearch.minSize``
* exclude some headers from indexation with ``mailserver.fullTextSearch.headerExcludes``
* disable expensive token normalisation in ``mailserver.fullTextSearch.filters``
* disable automatic indexation for some folders with
``mailserver.fullTextSearch.autoIndexExclude``. Folders can be specified by
name (``"Trash"``), by special use (``"\\Junk"``) or with a wildcard.

View File

@ -26,7 +26,12 @@ let
userdbFile = "${passwdDir}/userdb";
# This file contains the ldap bind password
ldapConfFile = "${passwdDir}/dovecot-ldap.conf.ext";
bool2int = x: if x then "1" else "0";
boolToYesNo = x: if x then "yes" else "no";
listToLine = lib.concatStringsSep " ";
listToMultiAttrs = keyPrefix: attrs: lib.listToAttrs (lib.imap1 (n: x: {
name = "${keyPrefix}${if n==1 then "" else toString n}";
value = x;
}) attrs);
maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs";
maildirUTF8FolderNames = lib.optionalString cfg.useUTF8FolderNames ":UTF-8";
@ -122,10 +127,22 @@ let
dovecotModules = [
pkgs.dovecot_pigeonhole
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian;
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot-fts-flatcurve;
# Remove and assume `false` after NixOS 25.05
haveDovecotModulesOption = options.services.dovecot2 ? "modules" && (options.services.dovecot2.modules.visible or true);
ftsPluginSettings = {
fts = "flatcurve";
fts_languages = listToLine cfg.fullTextSearch.languages;
fts_tokenizers = listToLine [ "generic" "email-address" ];
fts_tokenizer_email_address = "maxlen=100"; # default 254 too large for Xapian
fts_flatcurve_substring_search = boolToYesNo cfg.fullTextSearch.substringSearch;
fts_filters = listToLine cfg.fullTextSearch.filters;
fts_header_excludes = listToLine cfg.fullTextSearch.headerExcludes;
fts_autoindex = boolToYesNo cfg.fullTextSearch.autoIndex;
fts_enforced = cfg.fullTextSearch.enforced;
} // (listToMultiAttrs "fts_autoindex_exclude" cfg.fullTextSearch.autoIndexExclude);
in
{
config = with cfg; lib.mkIf enable {
@ -160,14 +177,17 @@ in
sslServerCert = certificatePath;
sslServerKey = keyPath;
enableLmtp = true;
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ];
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [
"fts"
"fts_flatcurve"
];
protocols = lib.optional cfg.enableManageSieve "sieve";
pluginSettings = {
sieve = "file:${cfg.sieveDirectory}/%{user}/scripts;active=${cfg.sieveDirectory}/%{user}/active.sieve";
sieve_default = "file:${cfg.sieveDirectory}/%{user}/default.sieve";
sieve_default_name = "default";
};
} // (lib.optionalAttrs cfg.fullTextSearch.enable ftsPluginSettings);
sieve = {
extensions = [
@ -341,26 +361,11 @@ in
inbox = yes
}
${lib.optionalString cfg.fullTextSearch.enable ''
plugin {
plugin = fts fts_xapian
fts = xapian
fts_xapian = partial=${toString cfg.fullTextSearch.minSize} verbose=${bool2int cfg.debug}
fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"}
${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude}
fts_enforced = ${cfg.fullTextSearch.enforced}
}
service indexer-worker {
${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) ''
vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)}
''}
process_limit = 0
}
''}
lda_mailbox_autosubscribe = yes
lda_mailbox_autocreate = yes
@ -378,29 +383,5 @@ in
};
systemd.services.postfix.restartTriggers = [ genPasswdScript ] ++ (lib.optional cfg.ldap.enable [setPwdInLdapConfFile]);
systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) {
description = "Optimize dovecot indices for fts_xapian";
requisite = [ "dovecot2.service" ];
after = [ "dovecot2.service" ];
startAt = cfg.fullTextSearch.maintenance.onCalendar;
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A";
PrivateDevices = true;
PrivateNetwork = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectSystem = true;
PrivateTmp = true;
};
};
systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) {
timerConfig = {
RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec;
};
};
};
}

View File

@ -82,8 +82,6 @@
# special use depends on https://github.com/NixOS/nixpkgs/pull/93201
autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ];
enforced = "yes";
# fts-xapian warns when memory is low, which makes the test fail
memoryLimit = 100000;
};
};
};
@ -493,11 +491,9 @@
# should fail because this folder is not indexed
client.fail("search Junk a >&2")
# check that search really goes through the indexer
server.succeed(
"journalctl -u dovecot2 | grep -E 'indexer-worker.* Done indexing .INBOX.' >&2"
)
server.succeed("journalctl -u dovecot2 | grep 'fts-flatcurve(INBOX): Query ' >&2")
# check that Junk is not indexed
server.fail("journalctl -u dovecot2 | grep 'indexer-worker' | grep -i 'JUNK' >&2")
server.fail("journalctl -u dovecot2 | grep 'fts-flatcurve(JUNK): Indexing ' >&2")
with subtest("dmarc reporting"):
server.systemctl("start rspamd-dmarc-reporter.service")
@ -507,14 +503,6 @@
server.fail("journalctl -u postfix | grep -i warning >&2")
server.fail("journalctl -u dovecot2 | grep -v 'imap-login: Debug: SSL error: Connection closed' | grep -i error >&2")
# harmless ? https://dovecot.org/pipermail/dovecot/2020-August/119575.html
server.fail(
"journalctl -u dovecot2 | \
grep -v 'Expunged message reappeared, giving a new UID' | \
grep -v 'FTS Xapian: Box is empty' | \
grep -v 'FTS Xapian: New version of the plugin' | \
grep -vE 'FTS Xapian:.*does not exist. Creating it' | \
grep -vE 'FTS Xapian:.*indexes do not exist. Initializing DB' | \
grep -i warning >&2"
)
server.fail("journalctl -u dovecot2 | grep -v 'Expunged message reappeared, giving a new UID' | grep -i warning >&2")
'';
}