using System.Text;
using Nuuru.Server.Services.Search.Tokens;
namespace Nuuru.Server.Services.Search;
///
/// Tokenizes a booru-style search query string into tokens.
///
public class SearchTokenizer
{
private readonly string _input;
private int _pos;
public SearchTokenizer(string input)
{
_input = input?.Trim() ?? string.Empty;
_pos = 0;
}
///
/// Tokenizes the input string into a list of search tokens.
///
public List Tokenize()
{
var tokens = new List();
while (!AtEnd)
{
SkipWhitespace();
if (AtEnd) break;
var token = ReadToken();
if (token != null)
tokens.Add(token);
}
return tokens;
}
private SearchToken? ReadToken()
{
// OR group markers
if (Current == '{')
{
Advance();
return new OrGroupStartToken();
}
if (Current == '}')
{
Advance();
return new OrGroupEndToken();
}
if (Current == '~')
{
Advance();
return new OrSeparatorToken();
}
// Check for negation prefix
bool negated = Current == '-';
if (negated) Advance();
// Check for quoted phrase
if (Current == '"')
{
var phrase = ReadQuotedTerm();
if (string.IsNullOrEmpty(phrase))
return null;
return negated
? new NegatedTagToken(phrase)
: new TagToken(phrase);
}
// Read the term (could be tag or meta-tag)
var term = ReadTerm();
if (string.IsNullOrEmpty(term))
return null;
// Check for meta-tag (contains :, >, <, or = not at the start)
var metaIdx = term.IndexOfAny([':', '>', '<', '=']);
if (metaIdx > 0)
{
return ParseMetaTag(term, negated);
}
// Check for wildcard
if (term.EndsWith('*'))
{
var prefix = term[..^1];
return new WildcardTagToken(prefix, negated);
}
// Regular tag
return negated
? new NegatedTagToken(term)
: new TagToken(term);
}
private MetaTagToken ParseMetaTag(string term, bool negated)
{
// Meta-tags can be key:value or key>value or key=value or key<=value or key..value
int splitIdx = term.IndexOf(':');
MetaOperator defaultOp = MetaOperator.Equals;
int skip = 1;
if (splitIdx < 0)
{
// Check for other operators if no colon
if (term.Contains(">=")) { splitIdx = term.IndexOf(">="); defaultOp = MetaOperator.GreaterThanOrEqual; skip = 2; }
else if (term.Contains("<=")) { splitIdx = term.IndexOf("<="); defaultOp = MetaOperator.LessThanOrEqual; skip = 2; }
else if (term.Contains('>')) { splitIdx = term.IndexOf('>'); defaultOp = MetaOperator.GreaterThan; skip = 1; }
else if (term.Contains('<')) { splitIdx = term.IndexOf('<'); defaultOp = MetaOperator.LessThan; skip = 1; }
else if (term.Contains('=')) { splitIdx = term.IndexOf('='); defaultOp = MetaOperator.Equals; skip = 1; }
else if (term.Contains("..")) { splitIdx = term.IndexOf(".."); defaultOp = MetaOperator.Range; skip = 0; } // skip 0 because rawValue includes ..
}
if (splitIdx <= 0)
{
// Fallback for safety, though should be caught by caller
return new MetaTagToken(term, MetaOperator.Equals, string.Empty, negated);
}
var key = term[..splitIdx].ToLowerInvariant();
var rawValue = term[(splitIdx + (defaultOp == MetaOperator.Range ? 0 : skip))..];
// If we used a colon, the operator might still be in the value (e.g. id:>100)
if (term[splitIdx] == ':')
{
// Range: key:min..max
if (rawValue.Contains(".."))
{
return new MetaTagToken(key, MetaOperator.Range, rawValue, negated);
}
// Greater than or equal: key:>=value
if (rawValue.StartsWith(">="))
{
return new MetaTagToken(key, MetaOperator.GreaterThanOrEqual, rawValue[2..], negated);
}
// Less than or equal: key:<=value
if (rawValue.StartsWith("<="))
{
return new MetaTagToken(key, MetaOperator.LessThanOrEqual, rawValue[2..], negated);
}
// Greater than: key:>value
if (rawValue.StartsWith('>'))
{
return new MetaTagToken(key, MetaOperator.GreaterThan, rawValue[1..], negated);
}
// Less than: key: _pos >= _input.Length;
private char Current => AtEnd ? '\0' : _input[_pos];
private void Advance() => _pos++;
private void SkipWhitespace()
{
while (!AtEnd && char.IsWhiteSpace(Current))
Advance();
}
#endregion
}