Friday, 11 December 2009

String Parsing in C#

Regular Expressions are powerful things, but they are difficult to learn when your main objective is to produce business-critical information systems and you must also keep up to date with the latest .net features, the latest browers and quirks, the latest version of CSS, new Javascript libraries.....I think you get the picture. It's something else to learn. This is the reason we google our parsing requirements and re-use existing regular expressions, without actually understanding them.

I had a couple of requirements of my own and couldn't solve the more complex parsing with regular expressions. So, I've knocked up a light-weight string parser to satisfy my needs. I thought I'd share it with you. Here's the class in C#:

public class StringParser {

    private string _s;
    private int _len;
    private int _plen;
    private int _lPos;
    private int _pos;
    private char _c;

    public StringParser(string s) {
        _s = s;
        if (_s != null) {
            _len = s.Length;
            _plen = (_len - 1);
            if (_len > 0) {
                _c = s[0];
                _pos = -1;
            }
        }
    }
  
    /// 
    /// Call to determine if there is still string to parse and to
    /// advance the char position by one
    /// 
    public bool Parse() {
        if (_pos < _plen) {                
            _c = _s[++_pos];
            return true;
        }
        return false;
    }

    /// 
    /// Advances the parser by a given number of chars.
    /// 
    /// number of chars to advance by
    /// A boolean true if successful, otherwise a boolean
    /// false if this advance pushes the position past the max len
    /// of the parsing string.
    public void AdvancePos(int places) {
        int npos = _pos + places;
        if (npos >= _len) {
            throw new IndexOutOfRangeException();
        }
        _pos = npos;
        _c = _s[_pos];
    }

    /// 
    /// Call to return the current char.
    /// 
    public char CurrentChar {
        get { return _c; }
    }

    /// 
    /// Call to check if parser is positioned on given sub string.
    /// 
    /// The sub string to check for at the current
    /// position.
    /// A boolean true if sub string is found at current
    /// position, otherwise false       
    public bool IsParsing(string sub) {
        if ((_pos + sub.Length) < _len) {
            return (_s.Substring(_pos, sub.Length) == sub);
        }
        return false;
    }

    /// 
    /// Call to read a portion/sub string of the string.  This
    /// method advances the position of the curr char.
    /// 
    /// Represents a sub string, or multiple
    /// sub strings(delimiters) to read up to.  If multiple sub
    /// strings are passed then it is the sub string closest to
    /// the current position which is used.
    /// If boolean true is passed then the
    /// sub string will be included in the returned string.
    /// 
    /// The sub string.        
    public string Read(string[] subArr, bool incl) {
        string read = null;
        int spos = _len;
        string wdel = string.Empty;
        foreach (string s in subArr) {
            int si = _s.IndexOf(s, _pos + 1);
            if (si > -1 && si <= spos) {
                spos = si;
                wdel = s;
            }
        }
        if (spos > _pos && spos < _len) {
            // put pos on last char read
            int npos = _pos + (spos - _pos);
            read = _s.Substring(_pos, (npos - _pos)
                + ((incl) ? wdel.Length : 0));
            _pos = npos;
            _c = _s[_pos];
        }
        return read;
    }  

    /// 
    /// Call to advance current position to a given sub string.
    /// This method advances the position of the curr char.
    ///  
    /// The string to find and move to.
    /// If boolean true is passed and the string
    /// is found, then position will advance by one char, otherwise
    /// the position will remain on the found string.
    /// A boolean true if 's' is found, otherwise false.
    ///         
    public bool MoveTo(string sub, bool adv) {
        int npos = _s.IndexOf(sub, _pos + 1);
        if (adv) {
            npos = npos + sub.Length;
        }
        if (npos > _pos && npos < _len) {
            _pos = npos;
            _c = _s[_pos];
            return true;
        }
        return false;
    }

    /// 
    /// Skip forwards to next non-ws char, but only if currently
    /// positioned on a space.  If curr char is not a space then
    /// the position isn't advanced.
    /// 
    public void Squash() {            
        while (_c == ' ' && Parse()) {
        }            
    }
}

0 comments:

Post a Comment

My Posts Go Here

Asp.Net Tips

Avoiding Redirects

C#

CSS Tips

Design Patterns

Registry Pattern

Google Ads

SQL & Database Tips

.Net Config