Grouping

The task

Tuesday, December 16, 2003

To produce generic solution for arranging of any kind of data in a list into a tree structure, flexibly grouped on properties of the list's elements.

The code

There are a few imports that will make your life easier. Use them as needed.

using System;
using System.Collections;
using System.IO;
using System.Reflection;

Library code

Data structures

The grouper arranges a list's items in sub lists on a tree structure. The tree's nodes hold some information about each group or sub group.

public class CGroupNode {

    private string m_PropertyName;
    private object m_PropertyValue;
    private NGroupNodes m_ChildNodes;
    private ArrayList m_Values;

    public CGroupNode(string sPropName, object propValue){
        m_PropertyName = sPropName;
        m_PropertyValue = propValue;
        m_ChildNodes = new NGroupNodes();
        m_Values = new ArrayList();
    }

    public string PropertyName{
        get{
            return m_PropertyName;
        }
    }
    public object PropertyValue{
        get{
            return m_PropertyValue;
        }
    }
    public NGroupNodes ChildNodes{
        get{
            return m_ChildNodes;
        }
    }
    public ArrayList Values{
        get{
            return m_Values;
        }
    }

    public override string ToString(){
        if(null == m_PropertyName || null == m_PropertyValue){
            return "(All)";
        }else{
            return m_PropertyName + ": " + m_PropertyValue.ToString();
        }
    }

} // class CGroupNode

public class NGroupNodes : CollectionBase {

    public NGroupNodes(){
    }

    public CGroupNode this[int index]{
        get{
            return (CGroupNode) this.InnerList[index];
        }
    }

    public void Add(CGroupNode node){
        this.InnerList.Add(node);
    }

} // class NGroupNodes

Grouping algorithm

By default, the grouper uses reflection to obtain the values from properties that the user specifies. If the list's elements are all of the same type, there is some optimization in obtaining the property infos only once. Note, however, that derived classes have much better optimization options (while reusing the grouping algorithm), as shown further below.

The group level is there to say: sort by all the keys, but only group by the top n keys.

public class CGrouper : IComparer {

    private string[] m_KeyProps;
    private int m_GroupLevel;

    public CGrouper(params string[] keyProps){
        m_KeyProps = keyProps;
        m_GroupLevel = keyProps.Length;
    }
    public CGrouper(int groupLevel, params string[] keyProps){
        if(groupLevel < 0 || groupLevel > keyProps.Length){
            throw new ArgumentException();
        }
        m_KeyProps = keyProps;
        m_GroupLevel = groupLevel;
    }

    public string[] KeyProperties{
        get{
            return m_KeyProps;
        }
    }
    public int GroupLevel{
        get{
            return m_GroupLevel;
        }
    }

    public NGroupNodes Group(IList list){
        return this.Group(list, false);
    }

    public NGroupNodes Group(IList list, bool fSingleType){

        NGroupNodes topNodes = new NGroupNodes();

        if(list.Count == 0){
            return topNodes;
        }

        if(m_GroupLevel == 0){
            CGroupNode only = new CGroupNode(null, null);
            only.Values.AddRange(list);
            topNodes.Add(only);
            return topNodes;
        }

        bool isFirst = true;
        object[] values = new object[m_GroupLevel];
        object[] lastValues = new object[m_GroupLevel];
        CGroupNode[] nodes = new CGroupNode[m_GroupLevel];
        PropertyInfo[] props = null;

        if(fSingleType){
            Type tp = list[0].GetType();
            props = new PropertyInfo[m_GroupLevel];
            for(int i = 0; i < m_GroupLevel; i++){
                props[i] = tp.GetProperty(m_KeyProps[i]);
            }
        }

        this.Sort(list);

        foreach(object o in list){

            bool hasChanged = false;
            CGroupNode node = nodes[nodes.Length - 1];

            for(int i = 0; i < nodes.Length; i++){

                values[i] = this.GetValue(o, i, props);

                if(isFirst
                || hasChanged
                || 0 != this.CompareValues(values[i], lastValues[i], m_KeyProps[i], true)){

                    node = this.CreateNode(m_KeyProps[i], values[i]);

                    nodes[i] = node;
                    if(0 == i){
                        topNodes.Add(node);
                    }else{
                        nodes[i - 1].ChildNodes.Add(node);
                    }

                    hasChanged = true;
                }
                lastValues[i] = values[i];
            }

            node.Values.Add(o);

            isFirst = false;
        }

        return topNodes;
    }

    public void Sort(IList list){
        ArrayList.Adapter(list).Sort(this);
    }

    protected virtual object GetValue(object o, int keyIndex, PropertyInfo[] props){
        // overriders should ignore "props"
        PropertyInfo pi = null;
        if(props != null){
            pi = props[keyIndex];
        }else{
            Type tp = o.GetType();
            pi = tp.GetProperty(m_KeyProps[keyIndex]);
        }
        return pi.GetValue(o, null);
    }

    public virtual int Compare(object a, object b){
        Type tp1 = a.GetType();
        Type tp2 = b.GetType();
        int i = 0;
        foreach(string sProp in m_KeyProps){
            PropertyInfo pi1 = tp1.GetProperty(sProp);
            PropertyInfo pi2 = tp2.GetProperty(sProp);
            object v1 = pi1.GetValue(a, null);
            object v2 = pi2.GetValue(b, null);
            i = this.CompareValues(v1, v2, sProp, false);
            if(i != 0){
                break;
            }
        }
        return i;
    }

    protected virtual int CompareValues(object v1, object v2, string sProp, bool fIsGrouping){
        if(v1 is IComparable && v2 is IComparable){
            return ((IComparable)v1).CompareTo(v2);
        }else{
            return this.CompareCustom(v1, v2, sProp, fIsGrouping);
        }
    }

    protected virtual int CompareCustom(object v1, object v2, string sProp, bool fIsGrouping){
        return 0;
    }

    protected virtual CGroupNode CreateNode(string sPropName, object propValue){
        return new CGroupNode(sPropName, propValue);
    }

} // class CGrouper

Extension code

We can add new capabilities to the grouper, such as grouping on a partial value of some property (like an initial letter of a name) while sorting on that property within a group. If it's known up front, the CGrouper class enables inheritors to operate much more efficiently, too.

public class CFileNameGrouper : CGrouper {

    public CFileNameGrouper() : base("Name") {
    }

    protected override object GetValue(object o, int keyIndex, PropertyInfo[] props){
        return ((FileSystemInfo)o).Name;
    }

    public override int Compare(object a, object b){
        return string.Compare(((FileSystemInfo)a).Name, ((FileSystemInfo)b).Name, true);
    }

    protected override int CompareValues(object v1, object v2, string sProp, bool fIsGrouping){

        string s1 = v1.ToString();
        string s2 = v2.ToString();

        if(fIsGrouping){

            // version 1: initial letter
            if(char.ToLower(s1[0]) == char.ToLower(s2[0])){
                return 0;
            }else{
                return string.Compare(s1, s2, true);
            }

            // version 2: range of initial letters
            // ...

        }else{
            return string.Compare(s1, s2, true);
        }
    }

    protected override CGroupNode CreateNode(string sPropName, object propValue){
        return new CGroupNode("Names", char.ToUpper(propValue.ToString()[0]));
    }

} // class CFileNameGrouper

Driver code

Tracing code

Traces are your friend.

public partial class Test {

    private static void WriteGroups(NGroupNodes groups){
        foreach(CGroupNode node in groups){
            WriteGroups(node, 0);
        }
    }

    private static void WriteGroups(CGroupNode node, int level){
        WriteIndent(level);
        Console.WriteLine(node);
        foreach(CGroupNode childNode in node.ChildNodes){
            WriteGroups(childNode, level + 1);
        }
        foreach(object o in node.Values){
            WriteIndent(level + 1);
            Console.WriteLine(o);
        }
    }

    private static void WriteIndent(int level){
        for(int i = 0; i < level; i++){
            Console.Write("    ");
        }
    }

} // partial class Test

If you think the WriteIndent function is embarrassing, have a look at the Gregor.Core.NStringIndents class, a dynamically growing lookup table for indentation strings.

Testing the file name grouper

Why not make this into an enhanced ls command?

public partial class Test {

    public static void Main(string[] args){

        DirectoryInfo di = null;
        if(args.Length == 0){
            di = new DirectoryInfo(Environment.CurrentDirectory);
        }else{
            di = new DirectoryInfo(args[0]);
        }

        ArrayList list = new ArrayList();
        list.AddRange(di.GetFiles());

        CGrouper grouper = new CFileNameGrouper();
        NGroupNodes groups = grouper.Group(list);

        WriteGroups(groups);
    }

} // partial class Test

Testing the base grouper

This one lets you play some (just pass the relevant property names as command line arguments). If you encounter any bugs, let me know.

public partial class Test {

    public static void Main(string[] args){

        IList list = new ArrayList();

        list.Add(new CResource("File", "Work", "Sally"));
        list.Add(new CResource("Message", "Work", "Sally"));
        list.Add(new CResource("Message", "Private", "Sally"));
        list.Add(new CResource("File", "Private", "Sally"));
        list.Add(new CResource("Message", "Private", "Sally"));
        list.Add(new CResource("File", "Work", "Larry"));
        list.Add(new CResource("Message", "Work", "Larry"));
        list.Add(new CResource("File", "Work", "Sally"));
        list.Add(new CResource("Message", "Work", "Larry"));
        list.Add(new CResource("Message", "Private", "Larry"));
        list.Add(new CResource("File", "Private", "Larry"));
        list.Add(new CResource("Message", "Work", "Larry"));

        int level = args.Length;
        if(args.Length > 0 && args[0].StartsWith("/level:")){
            level = int.Parse(args[0].Substring(7));
            string[] args2 = new string[args.Length - 1];
            for(int i = 1; i < args.Length; i++){
                args2[i - 1] = args[i];
            }
            args = args2;
        }

        CGrouper grouper = new CGrouper(level, args);
        NGroupNodes groups = grouper.Group(list, true);

        WriteGroups(groups);
    }

} // partial class Test

public class CResource {

    private string m_Type;
    private string m_Category;
    private string m_Owner;

    public CResource(string sType, string sCategory, string sOwner){
        m_Type = sType;
        m_Category = sCategory;
        m_Owner = sOwner;
    }

    public string Type{
        get{
            return m_Type;
        }
    }
    public string Category{
        get{
            return m_Category;
        }
    }
    public string Owner{
        get{
            return m_Owner;
        }
    }

    public override string ToString(){
        return this.Type + "." + this.Category + "." + this.Owner;
    }

} // class CResource