Merge branch 'master-dovecot-fts-flatcurve' into 'master'

dovecot/fts: switch to fts-flatcurve

Closes #239

See merge request simple-nixos-mailserver/nixos-mailserver!361
This commit is contained in:
Martin Weinelt 2025-05-19 22:44:15 +00:00
commit dceb60ea7d
5 changed files with 126 additions and 82 deletions

View File

@ -380,7 +380,21 @@ in
};
fullTextSearch = {
enable = mkEnableOption "Full text search indexing with xapian. This has significant performance and disk space cost.";
enable = mkEnableOption ''
Full text search indexing with Xapian through the fts_flatcurve plugin.
This has significant performance and disk space cost.
'';
memoryLimit = mkOption {
type = types.nullOr types.int;
default = null;
example = 2000;
description = ''
Memory limit for the indexer process, in MiB.
If null, leaves the default (which is rather low),
and if 0, no limit.
'';
};
autoIndex = mkOption {
type = types.bool;
default = true;
@ -406,36 +420,54 @@ in
'';
};
minSize = mkOption {
type = types.ints.between 3 1000;
default = 3;
description = "Minimum size of search terms";
};
memoryLimit = mkOption {
type = types.nullOr types.int;
default = null;
example = 2000;
description = "Memory limit for the indexer process, in MiB. If null, leaves the default (which is rather low), and if 0, no limit.";
languages = mkOption {
type = types.nonEmptyListOf types.str;
default = [ "en" ];
example = [ "en" "de" ];
description = ''
A list of languages that the full text search should detect.
At least one language must be specified.
The language listed first is the default and is used when language recognition fails.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_languages>.
'';
};
maintenance = {
enable = mkOption {
type = types.bool;
default = true;
description = "Regularly optmize indices, as recommended by upstream.";
};
substringSearch = mkOption {
type = types.bool;
default = false;
description = ''
If enabled, allows substring searches.
See <https://doc.dovecot.org/main/core/plugins/fts_flatcurve.html#fts_flatcurve_substring_search>.
onCalendar = mkOption {
type = types.str;
default = "daily";
description = "When to run the maintenance job. See systemd.time(7) for more information about the format.";
};
Enabling this requires significant additional storage space.
'';
};
randomizedDelaySec = mkOption {
type = types.int;
default = 1000;
description = "Run the maintenance job not exactly at the time specified with `onCalendar`, but plus or minus this many seconds.";
};
headerExcludes = mkOption {
type = types.listOf types.str;
default = [
"Received"
"DKIM-*"
"X-*"
"Comments"
];
description = ''
The list of headers to exclude.
See <https://doc.dovecot.org/main/core/plugins/fts.html#fts_header_excludes>.
'';
};
filters = mkOption {
type = types.listOf types.str;
default = [
"normalizer-icu"
"snowball"
"stopwords"
];
description = ''
The list of filters to apply.
<https://doc.dovecot.org/main/core/plugins/fts.html#filter-configuration>.
'';
};
};
@ -1269,6 +1301,18 @@ in
};
imports = [
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "enable" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "onCalendar" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maintenance" "randomizedDelaySec" ] ''
This option is not needed for fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "minSize" ] ''
This option is not supported by fts-flatcurve
'')
(lib.mkRemovedOptionModule [ "mailserver" "fullTextSearch" "maxSize" ] ''
This option is not needed since fts-xapian 1.8.3
'')

View File

@ -4,7 +4,7 @@ Full text search
By default, when your IMAP client searches for an email containing some
text in its *body*, dovecot will read all your email sequentially. This
is very slow and IO intensive. To speed body searches up, it is possible to
*index* emails with a plugin to dovecot, ``fts_xapian``.
*index* emails with a plugin to dovecot, ``fts_flatcurve``.
Enabling full text search
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -59,7 +59,8 @@ Mitigating resources requirements
You can:
* increase the minimum search term size ``mailserver.fullTextSearch.minSize``
* exclude some headers from indexation with ``mailserver.fullTextSearch.headerExcludes``
* disable expensive token normalisation in ``mailserver.fullTextSearch.filters``
* disable automatic indexation for some folders with
``mailserver.fullTextSearch.autoIndexExclude``. Folders can be specified by
name (``"Trash"``), by special use (``"\\Junk"``) or with a wildcard.

View File

@ -12,6 +12,17 @@ NixOS 25.05
- If you need to revert TCP connections, configure ``mailserver.redis.address`` to reference the value of ``config.services.redis.servers.rspamd.bind``.
- The integration with policyd-spf was removed and SPF handling is now fully based on Rspamd scoring.
(`merge request <https://gitlab.com/simple-nixos-mailserver/nixos-mailserver/-/merge_requests/380>`__)
- Switch to the more efficient `fts-flatcurve` indexer for full text search
(`merge request <https://gitlab.com/simple-nixos-mailserver/nixos-mailserver/-/merge_requests/361>`__).
This makes use of a new index, which will be automatically re-generated the
next time a folder is searched.
The operation is now quick enough to be performed "just-in-time".
Alternatively, all indices can be immediately re-generated for all users and
folders by running
`doveadm fts rescan -u '*' && doveadm index -u '*' -q '*'`.
The previous index (which is not automatically discarded to allow rollbacks)
can be cleaned up by removing all the `xapian-indexes` directories within
`mailserver.indexDir`.
- Individual domains can now be excluded from DMARC Reporting through ``mailserver.dmarcReporting.excludedDomains``.
(`merge request <https://gitlab.com/simple-nixos-mailserver/nixos-mailserver/-/merge_requests/297>`__)
- Configuring ``mailserver.forwards`` is now possible when the setup relies on LDAP.

View File

@ -26,7 +26,12 @@ let
userdbFile = "${passwdDir}/userdb";
# This file contains the ldap bind password
ldapConfFile = "${passwdDir}/dovecot-ldap.conf.ext";
bool2int = x: if x then "1" else "0";
boolToYesNo = x: if x then "yes" else "no";
listToLine = lib.concatStringsSep " ";
listToMultiAttrs = keyPrefix: attrs: lib.listToAttrs (lib.imap1 (n: x: {
name = "${keyPrefix}${if n==1 then "" else toString n}";
value = x;
}) attrs);
maildirLayoutAppendix = lib.optionalString cfg.useFsLayout ":LAYOUT=fs";
maildirUTF8FolderNames = lib.optionalString cfg.useUTF8FolderNames ":UTF-8";
@ -122,10 +127,22 @@ let
dovecotModules = [
pkgs.dovecot_pigeonhole
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot_fts_xapian;
] ++ lib.optional cfg.fullTextSearch.enable pkgs.dovecot-fts-flatcurve;
# Remove and assume `false` after NixOS 25.05
haveDovecotModulesOption = options.services.dovecot2 ? "modules" && (options.services.dovecot2.modules.visible or true);
ftsPluginSettings = {
fts = "flatcurve";
fts_languages = listToLine cfg.fullTextSearch.languages;
fts_tokenizers = listToLine [ "generic" "email-address" ];
fts_tokenizer_email_address = "maxlen=100"; # default 254 too large for Xapian
fts_flatcurve_substring_search = boolToYesNo cfg.fullTextSearch.substringSearch;
fts_filters = listToLine cfg.fullTextSearch.filters;
fts_header_excludes = listToLine cfg.fullTextSearch.headerExcludes;
fts_autoindex = boolToYesNo cfg.fullTextSearch.autoIndex;
fts_enforced = cfg.fullTextSearch.enforced;
} // (listToMultiAttrs "fts_autoindex_exclude" cfg.fullTextSearch.autoIndexExclude);
in
{
config = with cfg; lib.mkIf enable {
@ -136,6 +153,20 @@ in
}
];
warnings =
(lib.optional (
(builtins.length cfg.fullTextSearch.languages > 1) &&
(builtins.elem "stopwords" cfg.fullTextSearch.filters)
) ''
Using stopwords in `mailserver.fullTextSearch.filters` with multiple
languages in `mailserver.fullTextSearch.languages` configured WILL
cause some searches to fail.
The recommended solution is to NOT use the stopword filter when
multiple languages are present in the configuration.
'')
;
# for sieve-test. Shelling it in on demand usually doesnt' work, as it reads
# the global config and tries to open shared libraries configured in there,
# which are usually not compatible.
@ -160,14 +191,17 @@ in
sslServerCert = certificatePath;
sslServerKey = keyPath;
enableLmtp = true;
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [ "fts" "fts_xapian" ];
mailPlugins.globally.enable = lib.optionals cfg.fullTextSearch.enable [
"fts"
"fts_flatcurve"
];
protocols = lib.optional cfg.enableManageSieve "sieve";
pluginSettings = {
sieve = "file:${cfg.sieveDirectory}/%{user}/scripts;active=${cfg.sieveDirectory}/%{user}/active.sieve";
sieve_default = "file:${cfg.sieveDirectory}/%{user}/default.sieve";
sieve_default_name = "default";
};
} // (lib.optionalAttrs cfg.fullTextSearch.enable ftsPluginSettings);
sieve = {
extensions = [
@ -341,26 +375,11 @@ in
inbox = yes
}
${lib.optionalString cfg.fullTextSearch.enable ''
plugin {
plugin = fts fts_xapian
fts = xapian
fts_xapian = partial=${toString cfg.fullTextSearch.minSize} verbose=${bool2int cfg.debug}
fts_autoindex = ${if cfg.fullTextSearch.autoIndex then "yes" else "no"}
${lib.strings.concatImapStringsSep "\n" (n: x: "fts_autoindex_exclude${if n==1 then "" else toString n} = ${x}") cfg.fullTextSearch.autoIndexExclude}
fts_enforced = ${cfg.fullTextSearch.enforced}
}
service indexer-worker {
${lib.optionalString (cfg.fullTextSearch.memoryLimit != null) ''
vsz_limit = ${toString (cfg.fullTextSearch.memoryLimit*1024*1024)}
''}
process_limit = 0
}
''}
lda_mailbox_autosubscribe = yes
lda_mailbox_autocreate = yes
@ -378,29 +397,5 @@ in
};
systemd.services.postfix.restartTriggers = [ genPasswdScript ] ++ (lib.optional cfg.ldap.enable [setPwdInLdapConfFile]);
systemd.services.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable) {
description = "Optimize dovecot indices for fts_xapian";
requisite = [ "dovecot2.service" ];
after = [ "dovecot2.service" ];
startAt = cfg.fullTextSearch.maintenance.onCalendar;
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.dovecot}/bin/doveadm fts optimize -A";
PrivateDevices = true;
PrivateNetwork = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectSystem = true;
PrivateTmp = true;
};
};
systemd.timers.dovecot-fts-xapian-optimize = lib.mkIf (cfg.fullTextSearch.enable && cfg.fullTextSearch.maintenance.enable && cfg.fullTextSearch.maintenance.randomizedDelaySec != 0) {
timerConfig = {
RandomizedDelaySec = cfg.fullTextSearch.maintenance.randomizedDelaySec;
};
};
};
}

View File

@ -82,8 +82,6 @@
# special use depends on https://github.com/NixOS/nixpkgs/pull/93201
autoIndexExclude = [ (if (pkgs.lib.versionAtLeast pkgs.lib.version "21") then "\\Junk" else "Junk") ];
enforced = "yes";
# fts-xapian warns when memory is low, which makes the test fail
memoryLimit = 100000;
};
};
};
@ -493,11 +491,9 @@
# should fail because this folder is not indexed
client.fail("search Junk a >&2")
# check that search really goes through the indexer
server.succeed(
"journalctl -u dovecot2 | grep -E 'indexer-worker.* Done indexing .INBOX.' >&2"
)
server.succeed("journalctl -u dovecot2 | grep 'fts-flatcurve(INBOX): Query ' >&2")
# check that Junk is not indexed
server.fail("journalctl -u dovecot2 | grep 'indexer-worker' | grep -i 'JUNK' >&2")
server.fail("journalctl -u dovecot2 | grep 'fts-flatcurve(JUNK): Indexing ' >&2")
with subtest("dmarc reporting"):
server.systemctl("start rspamd-dmarc-reporter.service")
@ -510,10 +506,7 @@
server.fail(
"journalctl -u dovecot2 | \
grep -v 'Expunged message reappeared, giving a new UID' | \
grep -v 'FTS Xapian: Box is empty' | \
grep -v 'FTS Xapian: New version of the plugin' | \
grep -vE 'FTS Xapian:.*does not exist. Creating it' | \
grep -vE 'FTS Xapian:.*indexes do not exist. Initializing DB' | \
grep -v 'Time moved forwards' | \
grep -i warning >&2"
)
'';