/*
 * Logserver
 * Copyright (C) 2017-2025 Joel Reardon
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#ifndef __TOKENIZER__H__
#define __TOKENIZER__H__

#include <cassert>
#include <cstdint>
#include <cstring>
#include <chrono>
#include <list>
#include <regex>
#include <set>
#include <sstream>
#include <string>
#include <string_view>
#include <thread>
#include <vector>

using namespace std;

class Tokenizer {
public:
	static void printable_segments(const string_view& s,
				       size_t min,
				       vector<string_view>* out) {
		assert(out);
		size_t pos = 0;
		while (pos < s.length()) {
			while (pos < s.length() &&
			       !(isprint(s[pos]) || isspace(s[pos]))) ++pos;
			if (pos == s.length()) break;
			size_t start = pos;
			while (pos < s.length() &&
			       (isprint(s[pos]) || isspace(s[pos]))) ++pos;
			if (pos - start >= min)
				out->emplace_back(s.substr(start, pos - start));
		}
	}

	static string_view get_token(const string_view& s,
				     size_t pos,
				     const string& end) {
		set<char> terms;
		size_t cur = pos;
		for (auto &x : end) terms.insert(x);
		while (++cur != s.length()) {
			if (terms.count(s.at(cur))) {
				return s.substr(pos, cur - pos);
			}
		}
		return s.substr(pos);
	}

	static bool match_pairs(const string_view& s,
				vector<string_view>* components,
				vector<size_t>* matchings) {
		return match_pairs(s, components, matchings, false);
	}

	static bool match_pairs(const string_view& s,
				vector<string_view>* components,
				vector<size_t>* matchings,
				bool mind_quote) {
		assert(matchings);
		matchings->resize(s.length(), string::npos);
		components->resize(s.length());
		vector<pair<size_t, char>> stack;
		string annotated;
		if (mind_quote) annotate_quote(s, &annotated);
		for (size_t i = 0; i < s.length(); ++i) {
			if (mind_quote && annotated[i] != '_') continue;
			if (s[i] == '{' || s[i] == '[') {
				stack.push_back(make_pair(i, s[i]));
			}
			if (s[i] == '}' || s[i] == ']') {
				if (stack.empty()) throw string("parse error: empty stack");
				if ((stack.back().second == '{' && s[i] == ']') ||
			            (stack.back().second == '[' && s[i] == '}')) {
					throw string("parse error mismatch {}[]");
				}
				size_t start = stack.back().first;
				(*matchings)[i] = start;
				(*matchings)[start] = i;
				(*components)[start] = s.substr(start, i - start + 1);
				stack.pop_back();
			}
		}
		if (stack.size()) throw string("parse error: end with stack");
		return true;
	}

	static string longest_prefix(const vector<string>& vals) {
		if (vals.empty()) return "";
		if (vals.size() == 1) return vals[0];
		string ref = vals[0];
		string retval = "";
		for (size_t i = 0; i < ref.length(); ++i) {
			for (size_t j = 1; j < vals.size(); ++j) {
				if (i == vals.at(j).length()) return retval;
				assert(i < vals.at(j).length());
				if (vals.at(j).at(i) != ref.at(i))
					return retval;
			}
			retval += ref[i];
		}
		return retval;
	}

	static void annotate_quote(const string_view& data, string* annotated) {
		int in_quote = 0;
		char chr[2] = {'_', '\''};
		size_t i = 0;
		annotated->resize(data.length());
		while (i < data.length()) {
			if (data[i] == '\\') {
				(*annotated)[i] += chr[!!in_quote];
				(*annotated)[i + 1] += chr[!!in_quote];
				i += 2;
				continue;
			}
			if (data[i] == '\"' || data[i] == '\'') {
				if (in_quote == data[i]) {
					(*annotated)[i] += '\"';
					++i;
					in_quote = 0;
					continue;
				} else if (!in_quote) {
					(*annotated)[i] += '\"';
                                        in_quote = data[i];
                                        ++i;
                                        continue;
				}
			}
			(*annotated)[i] += chr[!!in_quote];
			++i;
		}
	}

	static string collapse_quote(const string& data) {
		static const set<char> chr = { '_', '\'' };
		string annotate;
		string retval;

		retval.reserve(data.length());
		annotate_quote(data, &annotate);
		for (size_t i = 0; i < data.length(); ++i) {
			if (chr.count(annotate[i])) retval += data[i];
		}
		return retval;
	}

	/* removes backslash escaping from a string. argument one is the string
	 * to be escaped, and argument two is a string listing the symbols to
	 * allow backslash escape. If this is empty, then only the standard
	 * control characters are escaped. If there is a backslash preceeding a
	 * non-escaped symbol or as the last symbol, it is considered invalid
	 * and the original string is returned. */
	static string backslash_unescape(const string& data,
					 const string& escapable) {
		// default escaped symbols
		static map<char, char> remap = {
			{'0', '\0'},
			{'a', '\a'},
			{'b', '\b'},
			{'n', '\n'},
			{'r', '\r'},
			{'t', '\t'},
			{'v', '\v'},
			{'\\', '\\'},
		};
		string ret;
		ret.reserve(data.length());

		for (size_t i = 0; i < data.length(); ++i) {
			if (data.at(i) == '\\') {
				++i;
				// ends in backslash, not escaped
				if (i == data.length()) return data;

				char c = data.at(i);
				if (remap.count(c))
					ret += remap.at(c);
				else if (escapable.find(c) == string::npos)
					return data;
				else
					ret += c;
			} else {
				ret += data.at(i);
			}
		}
		return ret;
	}

	static string backslash_escape(const string& data) {
		// default escaped symbols
		static map<char, char> remap = {
			{'\0', '0'},
			{'\a', 'a'},
			{'\b', 'b'},
			{'\n', 'n'},
			{'\r', 'r'},
			{'\t', 't'},
			{'\v', 'v'},
			{'\\', '\\'},
			{'\"', '\"'},
			{'\'', '\''},
		};
		string ret;
		ret.reserve(2 * data.length());

		for (size_t i = 0; i < data.length(); ++i) {
			if (remap.count(data.at(i))) {
				ret += '\\';
				ret += remap.at(data.at(i));
			} else {
				ret += data.at(i);
			}
		}
		return ret;
	}

	static string escape_pull(const string& input, size_t* pos, char end_c) {
		static map<char, char> remap = {
			{'0', '\0'},
			{'a', '\a'},
			{'b', '\b'},
			{'n', '\n'},
			{'r', '\r'},
			{'t', '\t'},
			{'v', '\v'},
			{'\\', '\\'},
		};
		string ret;
		if (input.at(*pos) == end_c) ++*pos;
		while (*pos < input.length()) {
			char c = input.at(*pos);
			if (c == end_c) break;
			if (c == '\\') {
				++*pos;
				assert(*pos < input.length());
				c = input.at(*pos);
				if (remap.count(c)) {
					ret += remap.at(c);
				} else {
					ret += c;
				}
			} else {
				ret += c;
			}
			++*pos;
		}
		return ret;
	}

	template <typename T>
	static string join(const T& data, const string& delimiter) {
		vector<string_view> views;
		for (const string& x : data) {
			views.emplace_back(x);
		}
		return join(views, delimiter);
	}

	static string join(const set<string_view>& data,
			   const string& delimiter) {
		if (data.empty()) return "";
		stringstream ss;
		for (const auto &x : data) {
			ss << x << delimiter;
		}
		return ss.str().substr(0, ss.str().length() -
				       delimiter.length());
	}

	static string join(const vector<string_view>& data,
			   const string& delimiter) {
		if (data.empty()) return "";
		stringstream ss;
		for (const auto &x : data) {
			ss << x << delimiter;
		}
		return ss.str().substr(0, ss.str().length() -
				       delimiter.length());
	}

	static string join(const vector<string>& data,
			   const string& delimiter) {
		if (data.empty()) return "";
		stringstream ss;
		for (const auto &x : data) {
			ss << x << delimiter;
		}
		return ss.str().substr(0, ss.str().length() -
				       delimiter.length());
	}

	static string join(const vector<string_view>& data, const string& delimiter,
			   const set<size_t> pos) {
		if (pos.size() == 0) return "";
		stringstream ss;
		for (const auto &x : pos) {
			ss << data[x] << delimiter;
		}
		return ss.str().substr(0, ss.str().length() -
				       delimiter.length());
	}

	static void join(const vector<string>& data, const string& delimiter,
			   const set<size_t> pos, string* in, string* out) {
		if (pos.size() == 0) *in = "";
		if (pos.size() == data.size()) *out = "";
		stringstream ss_in;
		stringstream ss_out;

		for (size_t i = 0; i < data.size(); ++i) {
			if (pos.count(i + 1)) {
				ss_in << data[i] << delimiter;
			} else {
				ss_out << data[i] << delimiter;
			}
		}
		if (pos.size() > 0)
			*in = ss_in.str().substr(
				0, ss_in.str().length() - delimiter.length());
		if (pos.size() < data.size())
			*out = ss_out.str().substr(
				0, ss_out.str().length() - delimiter.length());
	}

	static string replace_first(const string_view& data, const string& find,
			      const string& replacement) {
		vector<string_view> pieces;
		split_with_empty(data, find, &pieces);
		if (pieces.size() < 2) return string(data);
		stringstream ss;
		ss << pieces[0];
		ss << replacement;
		ss << pieces[1];
		for (size_t i = 2; i < pieces.size(); ++i) {
		     ss << find << pieces[i];
		}
		return ss.str();
	}

	static string replace(const string_view& data, const string& find,
			      const string& replacement) {
		vector<string_view> pieces;
		split_with_empty(data, find, &pieces);
		if (pieces.size() < 2) return string(data);
		stringstream ss;
		ss << pieces[0];
		for (size_t i = 1; i < pieces.size(); ++i) {
		     ss << replacement << pieces[i];
		}
		return ss.str();
	}

	static string remove(const string_view& data,
			     const set<char>& chars) {
		string ret;
		ret.reserve(data.length());
		for (size_t i = 0; i < data.length(); ++i) {
			if (!chars.count(data[i])) ret += data[i];
		}
		return ret;
	}

	template<typename T>
	static int last_token(const string& in, const string& delim, T out) {
		size_t pos = in.find_last_of(delim);
		if (pos != string::npos) {
			extract_one(in.substr(pos + delim.length()), out);
			return 1;
		}
		return 0;
	}

	static void add_token(const string_view& data, vector<string_view>* tokens) {
		if (data.length()) tokens->push_back(data);
	}

	static string trimout(const string_view& data, const string& delimitor) {
		string ret;
		ret.reserve(data.length());
		vector<string_view> pieces;
		split(data, delimitor, &pieces);
		for (const auto &x : pieces) {
			ret += x;
		}
		return ret;
	}

	static string_view trim(const string_view& str, const string& chars) {
		set<char> skip;
		for (auto &x : chars) {
			skip.insert(x);
		}
                size_t s = 0;
                size_t e = str.length() - 1;
                while (s < str.length() && skip.count(str.at(s))) ++s;
		if (s == str.length()) return str.substr(0, 0);
		while (skip.count(str[e])) {
			if (e == 0) return str.substr(0, 0);
			--e;
		}
                ++e;
                return str.substr(s, e - s);
        }

	static string_view trim(const string_view& str) {
                size_t s = 0;
                size_t e = str.length() - 1;
                while (s < str.length() && isspace(str[s])) ++s;
		if (s == str.length()) return str.substr(0, 0);
		while (isspace(str[e])) {
			if (e == 0) return str.substr(0, 0);
			--e;
		}
                ++e;
                return str.substr(s, e - s);
        }

	/*
        static bool whitespace(const char& c) {
                return (c == ' ' || c == '\t' || c == '\r' || c == '\n');
        }*/

	static bool pop_split(const string& data,
			       char delimiter,
			       int col,
			       string* in,
			       string* out) {
		int i = 1;
		size_t start_pos = 0;
		assert(out);
		assert(in);
		*in = "";
		for (size_t pos = 0; pos < data.length(); ++pos) {
			if (i == col) {
				start_pos = pos;
				if (start_pos) *in = data.substr(0, start_pos - 1);
				while (pos < data.length()) {
					if (data[pos] == delimiter) {
						*out = data.substr(
							start_pos,
							pos - start_pos);
						if (!in->empty()) *in +=
							delimiter;

						*in += data.substr(pos + 1);
						return true;
					}
					++pos;
				}
				*out = data.substr(start_pos);
				return true;
			}
			if (data[pos] == delimiter) ++i;
		}
		*out = "";
		return false;
	}

	static bool fast_split(const string& data,
			       char delimiter,
			       int col,
			       string* out) {
		int i = 1;
		size_t start_pos = 0;
		assert(out);
		for (size_t pos = 0; pos < data.length(); ++pos) {
			if (i == col) {
				start_pos = pos;
				while (pos < data.length()) {
					if (data[pos] == delimiter) {
						*out = data.substr(
							start_pos,
							pos - start_pos);
						return true;
					}
					++pos;
				}
				*out = data.substr(start_pos);
				return true;
			}
			if (data[pos] == delimiter) ++i;
		}
		*out = "";
		return false;
	}

	static void numset(const string& nums, set<size_t>* out, int shift) {
		set<size_t> tmp;
		numset(nums, &tmp);
		for (const auto &x : tmp) {
			out->insert(x + shift);
		}
	}

	static void numset(const string& nums, set<size_t>* out) {
		if (nums == "-") return;
		assert(out);
		vector<string_view> cols;
		split(nums, ",", &cols);
		for (auto &x : cols) {
			size_t val;

			extract_one(x, &val);
			out->insert(val);
		}
	}

	static void split(const string_view& data, const string& deliminator,
		          set<string_view>* tokens) {
		vector<string_view> result;
		split(data, deliminator, &result);
		for (const auto& x : result) {
			tokens->insert(x);
		}
	}

	static void split_mind_quote_and_copy(const string_view& data,
					      const string& deliminator,
					      vector<string>* tokens) {
		vector<string_view> result;
		split_mind_quote(data, deliminator, &result);
		for (const auto& x : result) {
			tokens->emplace_back(string(x));
		}
	}

	static void split_and_copy(const string& data,
				   const string& deliminator,
				   vector<string>* tokens) {
		return split_and_copy(string_view(data), deliminator, tokens);
	}

	static void split_and_copy(const string_view& data,
				   const string& deliminator,
				   vector<string>* tokens) {
		vector<string_view> result;
		split(data, deliminator, &result);
		for (const auto& x : result) {
			tokens->emplace_back(string(x));
		}
	}

	static void split(const string_view& data, const string& deliminator,
		          vector<string_view>* tokens) {
		assert(deliminator.length());
		size_t pos = 0;
		while (true) {
			assert(pos <= data.length());
			size_t start = pos;
			pos = data.find(deliminator, start);
			if (pos == string::npos) {
				add_token(data.substr(start), tokens);
				break;
			}
			add_token(data.substr(start, pos - start), tokens);
			pos += deliminator.length();
		}
	}

	static void split_with_empty(const string_view& data, const string& deliminator,
		                     vector<string_view>* tokens) {
		assert(deliminator.length());
		size_t pos = 0;
		while (true) {
			size_t start = pos;
			pos = data.find(deliminator, start);
			if (pos == string::npos) {
				tokens->push_back(data.substr(start));
				break;
			}
			tokens->push_back(data.substr(start, pos - start));
			pos += deliminator.length();
		}
	}

	static void split_mind_quote(const string_view& data, const string& deliminator,
				     vector<string_view>* tokens) {
		size_t pos = 0;
		string quote;
		annotate_quote(data, &quote);
		size_t start = pos;
		while (true) {
			pos = data.find(deliminator, pos);
			if (pos == string::npos) {
				add_token(data.substr(start), tokens);
				break;
			}
			if (quote.at(pos) == '_') {
				add_token(data.substr(start, pos - start), tokens);
				start = pos + deliminator.length();
			}
			pos += deliminator.length();
		}
	}

	static size_t extract_outer_paired(const string& left,
					   const string& right,
				  	   const string_view& data,
					   vector<string_view>* results) {
		assert(results);
		assert(left != right);
		size_t curpos = 0;

		while (curpos < data.size()) {
			curpos = data.find(left, curpos);
			if (curpos == string::npos) return results->size();

			int depth = 1;
			size_t start = curpos++;
			while (curpos < data.length() && depth) {
	                        if (substreq(data, curpos, right)) {
        	                        --depth;
                	                curpos += right.length();
	                        } else if (substreq(data, curpos, left)) {
        	                        ++depth;
                	                curpos += left.length();
	                        } else ++curpos;
			}
			if (depth) return results->size();
			results->push_back(data.substr(start, curpos - start));
		}
		return results->size();
	}

	static bool substreq(const string& main, size_t pos,
			     const string& search) {
		return !strncmp(main.c_str() + pos, search.c_str(),
				search.length());
	}

	/* for string views can use substr efficiently */
	static bool substreq(const string_view& main, size_t pos,
			     const string_view& search) {
		return main.substr(pos, search.length()) == search;
	}

	static size_t extract_all_paired(const string& left,
					 const string& right,
				  	 const string_view& data,
					 vector<string_view>* results) {
		assert(results);
		assert(left != right);
		int depth = 0;
		for (size_t i = 0; i < data.length(); ++i) {
			if (substreq(data, i, left)) {
				i += left.length();
				assert(!depth);
				depth = 1;
				for (size_t j = i; j < data.length(); ++j) {
					if (substreq(data, j, left)) {
						++depth;
					} else if (substreq(data, j, right)) {
						--depth;
					}
					if (!depth) {
						assert(i <= j);
						results->push_back(data.substr(
							i, j - i));
						break;
					}
				}
				if (depth) {
					depth = 0;
				}
			}
		}
		return results->size();
	}

	/* re_extract_all takes a regexp re, a string data,
	   and a position pos. It does a match all of the data using
	   re and puts the pos match into queries each go. 0th is full match,
	   1st is first parenthetic subexp, etc.
	 */
	static size_t re_extract_all(const regex& re,
				     const string& data,
				     size_t pos,
				     vector<string>* queries) {
		assert(queries);
		sregex_iterator it = sregex_iterator(
		    data.begin(), data.end(), re);
		while (it != sregex_iterator()) {
			assert(pos < it->size());
			queries->push_back(it->str(pos));
			++it;
		}
		return queries->size();
	}

	static size_t extract_all(const string_view& format,
				  const string_view& data,
				  vector<string_view>* results) {
		assert(results);
		int ret;
		string_view rest = data;
		assert(format.length());
		assert(format.at(format.length() - 1) != '%');
		while (true) {
			string_view tmp, result;
			ret = extract("%" + string(format) + "%", rest, nullptr,
				      &result, &tmp);
			if (ret >= 2) results->push_back(result);
			else break;
			rest = tmp;
		}
		return results->size();
	}

	static list<string> tokenize_for_extract(const string& format) {
		list<string> tokens;
		vector<string_view> tokens_with_empty;
		split_with_empty(format, "%", &tokens_with_empty);
		for (size_t i = 0; i < tokens_with_empty.size(); ++i) {
			if (tokens_with_empty[i].empty()) {
				if (tokens.empty()) {
					if (i) throw logic_error("parsing" + format);
					tokens.push_back("");
					if (format.length() > 1 &&
					    format[1] != '%')
						continue; // initial
				}
				if (i + 1 == tokens_with_empty.size()) {
					tokens.push_back("");
					continue;
				}
				string token = tokens.back();
				token += "%" + string(tokens_with_empty[++i]);
				tokens.pop_back();
				tokens.push_back(token);

			} else {
				tokens.push_back(string(tokens_with_empty[i]));
			}
		}
		return tokens;
	}

	template<typename... ARGS>
	static size_t extract_with_pos(
			const string& format, const string_view& data, ARGS... args) {
		if (data.empty()) return 0;
		if (format.empty()) return 0;

		try {
			list<string> tokens = tokenize_for_extract(format);
			return Tokenizer::extract_with_pos_impl(
				0, format, 0, data, &tokens, 0, args...);
		} catch (int e) {
			return 0;
		} catch (const logic_error& e) {
			return 0;
		}
	}

	template<typename T, typename... ARGS>
	static size_t extract_with_pos_impl(size_t pos_format, const string& format,
				   size_t pos_data, const string_view& data,
				   list<string>* tokens, int cnt,
				   T car, size_t* pos, ARGS... cdr) {
		if (pos_data == data.length()) return cnt;
		string token = tokens->front();
		tokens->pop_front();
		if (!tokens->size()) {
			throw logic_error("parse error");
		}
		if (!(check_token_match(
			data.substr(pos_data, token.length()),
			format.substr(pos_format, token.length())))) {
			// this only happens if the first fixed part of the
			// string is a non-matching component
			assert(!cnt);
			return cnt;
		}
		pos_format += token.length();
		pos_data += token.length();

		size_t nextpos = token_match(data, tokens->front(), pos_data);

		if (nextpos == string::npos || tokens->front().empty())
			nextpos = data.length();
		Tokenizer::extract_one(
			data.substr(pos_data, nextpos - pos_data), car);
		*pos = pos_data;
		pos_data = nextpos;
		pos_format += 1;

		return Tokenizer::extract_with_pos_impl(
			pos_format, format, pos_data, data,
			tokens, cnt + 1, cdr...);
	}

	// base case
	static size_t extract_with_pos_impl(
			size_t pos_format, const string& format,
			size_t pos_data, const string_view& data,
			list<string>* tokens, int cnt) {
		if (tokens->size() != 1) {
			throw logic_error("in base case, not one token remaining");
		}
		if (pos_data < data.length() && data.substr(pos_data) != tokens->front()) {
			throw logic_error("extract failed");
		}
		if (format.substr(pos_format) != tokens->front()) {
			throw logic_error("implementation failure");
		}
		return cnt;
	}

	template<typename... ARGS>
	static size_t extract(const string& format,
			      const string_view& data,
			      ARGS... args) {
		size_t pos_format = 0;
		size_t pos_data = 0;

		if (data.empty()) return 0;
		if (format.empty()) return 0;

		try {
			list<string> tokens = tokenize_for_extract(format);
			return Tokenizer::extract_impl(
				pos_format, format, pos_data, data,
				&tokens, 0, args...);
		} catch (int e) {
			return 0;
		} catch (const logic_error& e) {
			return 0;
		}
	}

	static size_t token_match(const string_view& data,
				  const string_view& token,
				  int pos) {
		static set<char> whitespace = {' ', '\f', '\v', '\n', '\r', '\t'};
		size_t whitepos = token.find_last_of("\4");
		if (whitepos == string::npos)
			return data.find(token, pos);
		else if (whitepos == 0) {
			size_t retval = string::npos;
			for (char c : whitespace) {
				string testtoken = string(token);
				testtoken[0] = c;
				size_t candidate = data.find(testtoken, pos);
				if (candidate < retval) retval = candidate;
			}
			return retval;
		}
		assert(0);
		return 0;
	}

	static bool check_token_match(const string_view& data,
				      const string_view& token) {
		static set<char> whitespace = {' ', '\f', '\v', '\n', '\r', '\t', '\4'};
		if (!token.length()) {
			if (!data.length()) return true;
			return false;
		}
		if (data.substr(1, data.length()) !=
		    token.substr(1, token.length()))
			return false;
		if (data[0] == token[0])
			return true;
		if (whitespace.count(data[0]) && whitespace.count(token[0]))
			return true;
		return false;
	}

	template<typename T, typename... ARGS>
	static size_t extract_impl(size_t pos_format, const string& format,
				   size_t pos_data, const string_view& data,
				   list<string>* tokens, int cnt,
				   T car, ARGS... cdr) {
		if (pos_data == data.length()) return cnt;
		string_view token = tokens->front();
		tokens->pop_front();
		if (!tokens->size()) {
			throw logic_error("parse error");
		}
		if (!(check_token_match(
			data.substr(pos_data, token.length()),
			format.substr(pos_format, token.length())))) {
			// this only happens if the first fixed part of the
			// string is a non-matching component
			assert(!cnt);
			return cnt;
		}
		pos_format += token.length();
		pos_data += token.length();

		size_t nextpos = token_match(data, tokens->front(), pos_data);

		if (nextpos == string::npos || tokens->front().empty())
			nextpos = data.length();

		Tokenizer::extract_one(data.substr(pos_data, nextpos -
						   pos_data),
				       car);
		pos_data = nextpos;
		pos_format += 1;

		return Tokenizer::extract_impl(
			pos_format, format, pos_data, data,
			tokens, cnt + 1, cdr...);
	}

	/* string specialization ensures that whitespace is preserved. */
	static void extract_one(const string_view& in, string* out) {
		if (!out) return;
		*out = in;
	}

	static void extract_one(const string_view& in, string_view* out) {
		if (!out) return;
		*out = in;
	}

	static void extract_one(const string_view& in, bool* out) {
		*out = false;
		if (in == "true") *out = true;
		if (in == "1") *out = true;
		if (in == "on") *out = true;
		if (in == "yes") *out = true;
		return;
	}

	static void extract_one([[maybe_unused]] const string_view& in,
				[[maybe_unused]] nullptr_t out) {
		return;
	}

	template<typename T>
	static void extract_one(const string_view& in, T out) {
		if (!out) return;
		stringstream ss;
		ss << in;
		ss >> *out;
	}

	// base case
	static size_t extract_impl(size_t pos_format, const string& format,
				   size_t pos_data, const string_view& data,
				   list<string>* tokens, int cnt) {
		if (tokens->size() != 1) {
			throw logic_error("in base case, not one token remaining");
		}
		if (pos_data < data.length() && data.substr(pos_data) != tokens->front()) {
			throw logic_error("extract failed");
		}
		if (format.substr(pos_format) != tokens->front()) {
			throw logic_error("implementation failure");
		}
		return cnt;
	}

	template<typename... Args>
	static int get_split_matching(vector<string_view>* out,
				      const string& delimiter,
				      const string_view& data,
				      Args... args) {
		vector<string_view> tokens;
		split(data, delimiter, &tokens);
		return get_lines_matching_impl(out, tokens, args...);
	}

	template<typename... Args>
	static int get_lines_matching(vector<string_view>* out,
				      const string_view& data,
				      Args... args) {
		vector<string_view> tokens;
		split(data, "\n", &tokens);
		return get_lines_matching_impl(out, tokens, args...);
	}

	template<typename... Args>
	static int get_lines_matching_impl(vector<string_view>* out,
					   const vector<string_view>& lines,
					   const string& car,
					   Args... cdr) {
		vector<string_view> result;
		get_lines_matching_impl(&result, lines, cdr...);
		for (auto &x : result) {
			if (x.find(car) != string::npos) {
				out->push_back(x);
			}
		}
		return out->size();
	}

	static int get_lines_matching_impl(vector<string_view>* out,
					   const vector<string_view>& lines) {
		assert(out);
		*out = lines;
		return out->size();
	}

	template<typename... Args>
	static string space(const string& value,
			    const string& spacer,
			    Args... args) {
		stringstream ss;
		space_impl(value, spacer, &ss, args...);
		return ss.str();
	}

	template<typename... Args>
	static void space_impl(const string& value,
			       const string& spacer,
			       stringstream* ss,
			       size_t car,
			       Args... cdr) {
		assert(ss);
		assert(car <= value.length());
		*ss << value.substr(0, car);
		if (car < value.length()) {
			*ss << spacer;
			space_impl(value.substr(car), spacer, ss, cdr...);
		}
	}

	static void space_impl(const string& value,
			       [[maybe_unused]] const string& spacer,
			       stringstream* ss) {
		*ss << value;
	}

	static string make_list(const string& hex, size_t digits) {
		if (!(digits == 2 || digits == 4 || digits == 8)) return "";
		if (hex.length() % digits) return "";

		stringstream ssout;
		for (size_t i = 0; i < hex.length(); i += digits) {
			stringstream ss;
			int64_t val = stoul(hex.substr(i, digits), nullptr, 16);
			if (val > (1 << (digits * 4 - 1)))
				val -= (1 << (digits * 4));
			ssout << val;
			if (i < hex.length() - digits) ssout << ", ";
		}
		return ssout.str();
        }

	/* Returns true if value matches a wildcard format using * and ?,
	   where * matches 0 or more characters and ? matches exactly one
	 */
	static bool wildcard_match(const string& format, const string& value) {
		size_t N = format.length() + 1 + value.length() + 1;

		multimap<size_t, pair<size_t, size_t>> paths;
		paths.insert(make_pair(0, make_pair(0, 0)));
		auto it = paths.begin();
		while (it != paths.end() && it->first < N) {
			const auto& coord = *it;
			if (format[coord.second.first] == '*') {
				if (coord.second.first + 1 <= format.length()) {
					paths.insert(paths.end(), make_pair(
						coord.first + 1,
						make_pair(coord.second.first + 1,
							  coord.second.second)));
				}
				if (coord.second.second + 1 <= value.length()) {
					paths.insert(paths.end(), make_pair(
						coord.first + 1,
						make_pair(coord.second.first,
							  coord.second.second + 1)));
				}
			} else if (format[coord.second.first] == '?' ||
				   format[coord.second.first] == value[coord.second.second]) {
				paths.insert(paths.end(), make_pair(coord.first + 2,
					     make_pair(coord.second.first
						       +1,
						       coord.second.second+1)));
			}
			++it;
		}
		return paths.count(N);
	}
};

#endif  // __TOKENIZER__H__
