using System.Text; using Nuuru.Server.Services.Search.Tokens; namespace Nuuru.Server.Services.Search; /// /// Tokenizes a booru-style search query string into tokens. /// public class SearchTokenizer { private readonly string _input; private int _pos; public SearchTokenizer(string input) { _input = input?.Trim() ?? string.Empty; _pos = 0; } /// /// Tokenizes the input string into a list of search tokens. /// public List Tokenize() { var tokens = new List(); while (!AtEnd) { SkipWhitespace(); if (AtEnd) break; var token = ReadToken(); if (token != null) tokens.Add(token); } return tokens; } private SearchToken? ReadToken() { // OR group markers if (Current == '{') { Advance(); return new OrGroupStartToken(); } if (Current == '}') { Advance(); return new OrGroupEndToken(); } if (Current == '~') { Advance(); return new OrSeparatorToken(); } // Check for negation prefix bool negated = Current == '-'; if (negated) Advance(); // Check for quoted phrase if (Current == '"') { var phrase = ReadQuotedTerm(); if (string.IsNullOrEmpty(phrase)) return null; return negated ? new NegatedTagToken(phrase) : new TagToken(phrase); } // Read the term (could be tag or meta-tag) var term = ReadTerm(); if (string.IsNullOrEmpty(term)) return null; // Check for meta-tag (contains :, >, <, or = not at the start) var metaIdx = term.IndexOfAny([':', '>', '<', '=']); if (metaIdx > 0) { return ParseMetaTag(term, negated); } // Check for wildcard if (term.EndsWith('*')) { var prefix = term[..^1]; return new WildcardTagToken(prefix, negated); } // Regular tag return negated ? new NegatedTagToken(term) : new TagToken(term); } private MetaTagToken ParseMetaTag(string term, bool negated) { // Meta-tags can be key:value or key>value or key=value or key<=value or key..value int splitIdx = term.IndexOf(':'); MetaOperator defaultOp = MetaOperator.Equals; int skip = 1; if (splitIdx < 0) { // Check for other operators if no colon if (term.Contains(">=")) { splitIdx = term.IndexOf(">="); defaultOp = MetaOperator.GreaterThanOrEqual; skip = 2; } else if (term.Contains("<=")) { splitIdx = term.IndexOf("<="); defaultOp = MetaOperator.LessThanOrEqual; skip = 2; } else if (term.Contains('>')) { splitIdx = term.IndexOf('>'); defaultOp = MetaOperator.GreaterThan; skip = 1; } else if (term.Contains('<')) { splitIdx = term.IndexOf('<'); defaultOp = MetaOperator.LessThan; skip = 1; } else if (term.Contains('=')) { splitIdx = term.IndexOf('='); defaultOp = MetaOperator.Equals; skip = 1; } else if (term.Contains("..")) { splitIdx = term.IndexOf(".."); defaultOp = MetaOperator.Range; skip = 0; } // skip 0 because rawValue includes .. } if (splitIdx <= 0) { // Fallback for safety, though should be caught by caller return new MetaTagToken(term, MetaOperator.Equals, string.Empty, negated); } var key = term[..splitIdx].ToLowerInvariant(); var rawValue = term[(splitIdx + (defaultOp == MetaOperator.Range ? 0 : skip))..]; // If we used a colon, the operator might still be in the value (e.g. id:>100) if (term[splitIdx] == ':') { // Range: key:min..max if (rawValue.Contains("..")) { return new MetaTagToken(key, MetaOperator.Range, rawValue, negated); } // Greater than or equal: key:>=value if (rawValue.StartsWith(">=")) { return new MetaTagToken(key, MetaOperator.GreaterThanOrEqual, rawValue[2..], negated); } // Less than or equal: key:<=value if (rawValue.StartsWith("<=")) { return new MetaTagToken(key, MetaOperator.LessThanOrEqual, rawValue[2..], negated); } // Greater than: key:>value if (rawValue.StartsWith('>')) { return new MetaTagToken(key, MetaOperator.GreaterThan, rawValue[1..], negated); } // Less than: key: _pos >= _input.Length; private char Current => AtEnd ? '\0' : _input[_pos]; private void Advance() => _pos++; private void SkipWhitespace() { while (!AtEnd && char.IsWhiteSpace(Current)) Advance(); } #endregion }