using System; using System.Collections.Generic; using System.Globalization; using System.IO; using System.Linq; using System.Reflection; using System.Text; namespace ProtoBuf.Reflection { internal static class TokenExtensions { public static bool Is(this Peekable tokens, TokenType type, string value = null) { Token val; return tokens.Peek(out val) && val.Is(type, value); } public static void Consume(this Peekable tokens, TokenType type, string value) { var token = tokens.Read(); token.Assert(type, value); tokens.Consume(); } public static bool ConsumeIf(this Peekable tokens, TokenType type, string value) { Token token; if (tokens.Peek(out token) && token.Is(type, value)) { tokens.Consume(); return true; } return false; } public static Token Read(this Peekable tokens) { Token val; if (!tokens.Peek(out val)) { throw new ParserException(tokens.Previous, "Unexpected end of file", true); } return val; } public static bool SkipToEndOptions(this Peekable tokens) { Token token; while (tokens.Peek(out token)) { if (token.Is(TokenType.Symbol, ";") || token.Is(TokenType.Symbol, "}")) return true; // but don't consume tokens.Consume(); if (token.Is(TokenType.Symbol, "]")) return true; } return false; } public static bool SkipToEndStatement(this Peekable tokens) { Token token; while (tokens.Peek(out token)) { if (token.Is(TokenType.Symbol, "}")) return true; // but don't consume tokens.Consume(); if (token.Is(TokenType.Symbol, ";")) return true; } return false; } public static bool SkipToEndObject(this Peekable tokens) => SkipToSymbol(tokens, "}"); private static bool SkipToSymbol(this Peekable tokens, string symbol) { Token token; while (tokens.Peek(out token)) { tokens.Consume(); if (token.Is(TokenType.Symbol, symbol)) return true; } return false; } public static bool SkipToEndStatementOrObject(this Peekable tokens) { Token token; while (tokens.Peek(out token)) { tokens.Consume(); if (token.Is(TokenType.Symbol, "}") || token.Is(TokenType.Symbol, ";")) return true; } return false; } public static string Consume(this Peekable tokens, TokenType type) { var token = tokens.Read(); token.Assert(type); string s = token.Value; tokens.Consume(); return s; } static class EnumCache { private static readonly Dictionary lookup; public static bool TryGet(string name, out T value) => lookup.TryGetValue(name, out value); static EnumCache() { var fields = typeof(T).GetFields(BindingFlags.Static | BindingFlags.Public); var tmp = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (var field in fields) { string name = field.Name; var attrib = (ProtoEnumAttribute)field.GetCustomAttributes(false).FirstOrDefault(); if (!string.IsNullOrWhiteSpace(attrib?.Name)) name = attrib.Name; var val = (T)field.GetValue(null); tmp.Add(name, val); } lookup = tmp; } } internal static T ConsumeEnum(this Peekable tokens, bool ignoreCase = true) where T : struct { var token = tokens.Read(); var value = tokens.ConsumeString(); T val; if (!EnumCache.TryGet(token.Value, out val)) token.Throw("Unable to parse " + typeof(T).Name); return val; } internal static bool TryParseUInt32(string token, out uint val, uint? max = null) { if (max.HasValue && token == "max") { val = max.GetValueOrDefault(); return true; } if (token.StartsWith("0x", StringComparison.OrdinalIgnoreCase) && uint.TryParse(token.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out val)) { return true; } return uint.TryParse(token, NumberStyles.Integer | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out val); } internal static bool TryParseUInt64(string token, out ulong val, ulong? max = null) { if (max.HasValue && token == "max") { val = max.GetValueOrDefault(); return true; } if (token.StartsWith("0x", StringComparison.OrdinalIgnoreCase) && ulong.TryParse(token.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out val)) { return true; } return ulong.TryParse(token, NumberStyles.Integer | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out val); } internal static bool TryParseInt32(string token, out int val, int? max = null) { if (max.HasValue && token == "max") { val = max.GetValueOrDefault(); return true; } if (token.StartsWith("-0x", StringComparison.OrdinalIgnoreCase) && int.TryParse(token.Substring(3), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out val)) { val = -val; return true; } if (token.StartsWith("0x", StringComparison.OrdinalIgnoreCase) && int.TryParse(token.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out val)) { return true; } return int.TryParse(token, NumberStyles.Integer | NumberStyles.AllowLeadingSign | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out val); } internal static bool TryParseInt64(string token, out long val, long? max = null) { if (max.HasValue && token == "max") { val = max.GetValueOrDefault(); return true; } if (token.StartsWith("-0x", StringComparison.OrdinalIgnoreCase) && long.TryParse(token.Substring(3), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out val)) { val = -val; return true; } if (token.StartsWith("0x", StringComparison.OrdinalIgnoreCase) && long.TryParse(token.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out val)) { return true; } return long.TryParse(token, NumberStyles.Integer | NumberStyles.AllowLeadingSign | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out val); } internal static int ConsumeInt32(this Peekable tokens, int? max = null) { var token = tokens.Read(); token.Assert(TokenType.AlphaNumeric); tokens.Consume(); int val; if (TryParseInt32(token.Value, out val, max)) return val; throw token.Throw("Unable to parse integer"); } internal static string ConsumeString(this Peekable tokens, bool asBytes = false) { var token = tokens.Read(); switch (token.Type) { case TokenType.StringLiteral: MemoryStream ms = null; do { ReadStringBytes(ref ms, token.Value); tokens.Consume(); } while (tokens.Peek(out token) && token.Type == TokenType.StringLiteral); // literal concat is a thing if (ms == null) return ""; if (!asBytes) { #if NETSTANDARD1_3 string s = ms.TryGetBuffer(out var segment) ? Encoding.UTF8.GetString(segment.Array, segment.Offset, segment.Count) : Encoding.UTF8.GetString(ms.ToArray()); #else string s = Encoding.UTF8.GetString(ms.GetBuffer(), 0, (int)ms.Length); #endif return s.Replace("\\", @"\\") .Replace("\'", @"\'") .Replace("\"", @"\""") .Replace("\r", @"\r") .Replace("\n", @"\n") .Replace("\t", @"\t"); } var sb = new StringBuilder((int)ms.Length); int b; ms.Position = 0; while ((b = ms.ReadByte()) >= 0) { switch (b) { case '\n': sb.Append(@"\n"); break; case '\r': sb.Append(@"\r"); break; case '\t': sb.Append(@"\t"); break; case '\'': sb.Append(@"\'"); break; case '\"': sb.Append(@"\"""); break; case '\\': sb.Append(@"\\"); break; default: if (b >= 32 && b < 127) { sb.Append((char)b); } else { // encode as 3-part octal sb.Append('\\') .Append((char)(((b >> 6) & 7) + (int)'0')) .Append((char)(((b >> 3) & 7) + (int)'0')) .Append((char)(((b >> 0) & 7) + (int)'0')); } break; } } return sb.ToString(); case TokenType.AlphaNumeric: tokens.Consume(); return token.Value; default: throw token.Throw(); } } internal static void AppendAscii(MemoryStream target, string ascii) { foreach (char c in ascii) target.WriteByte(checked((byte)c)); } internal static void AppendByte(MemoryStream target, ref uint codePoint, ref int len) { if (len != 0) { target.WriteByte(checked((byte)codePoint)); } codePoint = 0; len = 0; } internal static unsafe void AppendNormalized(MemoryStream target, ref uint codePoint, ref int len) { if (len == 0) { codePoint = 0; return; } byte* b = stackalloc byte[10]; char c = checked((char)codePoint); int count = Encoding.UTF8.GetBytes(&c, 1, b, 10); for (int i = 0; i < count; i++) { target.WriteByte(b[i]); } } internal static void AppendEscaped(MemoryStream target, char c) { uint codePoint; switch (c) { // encoded as octal case 'a': codePoint = '\a'; break; case 'b': codePoint = '\b'; break; case 'f': codePoint = '\f'; break; case 'v': codePoint = '\v'; break; case 't': codePoint = '\t'; break; case 'n': codePoint = '\n'; break; case 'r': codePoint = '\r'; break; case '\\': case '?': case '\'': case '\"': codePoint = c; break; default: codePoint = '?'; break; } int len = 1; AppendNormalized(target, ref codePoint, ref len); } internal static bool GetHexValue(char c, out uint val, ref int len) { len++; if (c >= '0' && c <= '9') { val = (uint)c - (uint)'0'; return true; } if (c >= 'a' && c <= 'f') { val = 10 + (uint)c - (uint)'a'; return true; } if (c >= 'A' && c <= 'F') { val = 10 + (uint)c - (uint)'A'; return true; } len--; val = 0; return false; } // the normalized output *includes* the slashes, but expands octal to 3 places; // it is the job of codegen to change this normalized form to the target language form internal static void ReadStringBytes(ref MemoryStream ms, string value) { const int STATE_NORMAL = 0, STATE_ESCAPE = 1, STATE_OCTAL = 2, STATE_HEX = 3; int state = STATE_NORMAL; if (value == null || value.Length == 0) return; if (ms == null) ms = new MemoryStream(value.Length); uint escapedCodePoint = 0; int escapeLength = 0; foreach (char c in value) { switch (state) { case STATE_ESCAPE: if (c >= '0' && c <= '7') { state = STATE_OCTAL; GetHexValue(c, out escapedCodePoint, ref escapeLength); // not a typo; all 1-char octal values are also the same in hex } else if (c == 'x') { state = STATE_HEX; } else if (c == 'u' || c == 'U') { throw new NotSupportedException("Unicode escape points: on my todo list"); } else { state = STATE_NORMAL; AppendEscaped(ms, c); } break; case STATE_OCTAL: if (c >= '0' && c <= '7') { uint x; GetHexValue(c, out x, ref escapeLength); escapedCodePoint = (escapedCodePoint << 3) | x; if (escapeLength == 3) { AppendByte(ms, ref escapedCodePoint, ref escapeLength); state = STATE_NORMAL; } } else { // not an octal char - regular append if (escapeLength == 0) { // include the malformed \x AppendAscii(ms, @"\x"); } else { AppendByte(ms, ref escapedCodePoint, ref escapeLength); } state = STATE_NORMAL; goto case STATE_NORMAL; } break; case STATE_HEX: { uint x; if (GetHexValue(c, out x, ref escapeLength)) { escapedCodePoint = (escapedCodePoint << 4) | x; if (escapeLength == 2) { AppendByte(ms, ref escapedCodePoint, ref escapeLength); state = STATE_NORMAL; } } else { // not a hex char - regular append AppendByte(ms, ref escapedCodePoint, ref escapeLength); state = STATE_NORMAL; goto case STATE_NORMAL; } } break; case STATE_NORMAL: if (c == '\\') { state = STATE_ESCAPE; } else { uint codePoint = (uint)c; int len = 1; AppendNormalized(ms, ref codePoint, ref len); } break; default: throw new InvalidOperationException(); } } // append any trailing escaped data AppendByte(ms, ref escapedCodePoint, ref escapeLength); } internal static bool ConsumeBoolean(this Peekable tokens) { var token = tokens.Read(); token.Assert(TokenType.AlphaNumeric); tokens.Consume(); if (string.Equals("true", token.Value, StringComparison.OrdinalIgnoreCase)) return true; if (string.Equals("false", token.Value, StringComparison.OrdinalIgnoreCase)) return false; throw token.Throw("Unable to parse boolean"); } static TokenType Identify(char c) { if (c == '"' || c == '\'') return TokenType.StringLiteral; if (char.IsWhiteSpace(c)) return TokenType.Whitespace; if (char.IsLetterOrDigit(c)) return TokenType.AlphaNumeric; switch (c) { case '_': case '.': case '-': return TokenType.AlphaNumeric; } return TokenType.Symbol; } public static IEnumerable RemoveCommentsAndWhitespace(this IEnumerable tokens) { int commentLineNumber = -1; bool isBlockComment = false; foreach (var token in tokens) { if (isBlockComment) { // swallow everything until the end of the block comment if (token.Is(TokenType.Symbol, "*/")) isBlockComment = false; } else if (commentLineNumber == token.LineNumber) { // swallow everything else on that line } else if (token.Is(TokenType.Whitespace)) { continue; } else if (token.Is(TokenType.Symbol, "//")) { commentLineNumber = token.LineNumber; } else if (token.Is(TokenType.Symbol, "/*")) { isBlockComment = true; } else { yield return token; } } } static bool CanCombine(TokenType type, int len, char prev, char next) => type != TokenType.Symbol || (len == 1 && prev == '/' && (next == '/' || next == '*')) || (len == 1 && prev == '*' && next == '/'); public static IEnumerable Tokenize(this TextReader reader, string file) { var buffer = new StringBuilder(); int lineNumber = 0, offset = 0; string line; string lastLine = null; while ((line = reader.ReadLine()) != null) { lastLine = line; lineNumber++; int columnNumber = 0, tokenStart = 1; char lastChar = '\0', stringType = '\0'; TokenType type = TokenType.None; bool isEscaped = false; foreach (char c in line) { columnNumber++; if (type == TokenType.StringLiteral) { if (c == stringType && !isEscaped) { yield return new Token(buffer.ToString(), lineNumber, tokenStart, type, line, offset++, file); buffer.Clear(); type = TokenType.None; } else { buffer.Append(c); isEscaped = !isEscaped && c == '\\'; // ends an existing escape or starts a new one } } else { var newType = Identify(c); if (newType == type && CanCombine(type, buffer.Length, lastChar, c)) { buffer.Append(c); } else { if (buffer.Length != 0) { yield return new Token(buffer.ToString(), lineNumber, tokenStart, type, line, offset++, file); buffer.Clear(); } type = newType; tokenStart = columnNumber; if (newType == TokenType.StringLiteral) { stringType = c; } else { buffer.Append(c); } } } lastChar = c; } if (buffer.Length != 0) { yield return new Token(buffer.ToString(), lineNumber, tokenStart, type, lastLine, offset++, file); buffer.Clear(); } } } internal static bool TryParseSingle(string token, out float val) { if (token == "nan") { val = float.NaN; return true; } if (token == "inf") { val = float.PositiveInfinity; return true; } if (token == "-inf") { val = float.NegativeInfinity; return true; } return float.TryParse(token, NumberStyles.Number | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out val); } internal static bool TryParseDouble(string token, out double val) { if(token == "nan") { val = double.NaN; return true; } if(token == "inf") { val = double.PositiveInfinity; return true; } if(token == "-inf") { val = double.NegativeInfinity; return true; } return double.TryParse(token, NumberStyles.Number | NumberStyles.AllowExponent, CultureInfo.InvariantCulture, out val); } } }