Expression Trees

Generic List converter to multidimensional (2D) string array (for excel data population)

Posted on Updated on

This post is still related to the excel data population that I posted 2 weeks back where I concluded that the most performant way for the data population is using multidimensional string array (multi cells & rows).

Let’s first take a look at the SampleObject class that we use to populate the data into excel

class SampleObject
{
    public string A { get; set; }
    public string B { get; set; }
    public string C { get; set; }
    public string D { get; set; }
    public string E { get; set; }
}

Now in the previous post, the data population is being done “statically” compiled, means it only supports for that exact SampleObject type/class.

static void PopulateItemsWithMultiCellsAndRowsMethod(Worksheet worksheet, IList<SampleObject> items)
{
    LogFormat("Start populating {0} rows using multi cells and rows method", items.Count.ToString());
    var stringArray = new string[items.Count, 5];
    for (int i = 0; i < items.Count; i++)
    {
        var item = items[i];
        var rowIndex = i + 1;
        stringArray[i, 0] = item.A;
        stringArray[i, 1] = item.B;
        stringArray[i, 2] = item.C;
        stringArray[i, 3] = item.D;
        stringArray[i, 4] = item.E;
    }
 
    var r = worksheet.get_Range(string.Format("A{0}:E{1}", 1, items.Count));
    r.Value = stringArray;
    DisposeCOMObject(r);
}

The above will work just fine until there are other “SampleObjectX”s that need to be supported, so it will be more convenient if we can “dynamically” do this for any given objects.

One obvious answer will be using Reflection but there is also another alternative which is generating dynamic methods using Expression Tree. There are pros and cons between the 2 options above (nicely explained by this post).

In below code snippet, I summarized the 3 approaches

1. Statically

string[,] ConvertTo2DStringArrayStatically(IList<SampleObject> items)
{   
    var stringArray = new string[items.Count, 5];
    for (int i = 0; i < items.Count; i++)
    {
        var item = items[i];
        stringArray[i, 0] = item.A;
        stringArray[i, 1] = item.B;
        stringArray[i, 2] = item.C;
        stringArray[i, 3] = item.D;
        stringArray[i, 4] = item.E;
    }
    return stringArray;
}

2. With reflection

string[,] ConvertTo2DStringArrayWithReflection<T>(IList<T> items)
{
    var props = typeof(T).GetProperties();
    var stringArray = new string[items.Count, props.Length];

    for (int i = 0; i < items.Count; i++)
    {
        var item = items[i];
        for (int iProp = 0; iProp < props.Length; iProp++)
        {
            stringArray[i, iProp] = props[iProp].GetValue(item).ToString();
        }
    }
    return stringArray;
}

3. With Expression Tree (no cache is added intentionally for brevity purpose)

Func<IList<T>, string[,]> ConvertTo2DStringArrayWithExpressionTree<T>()
{
    var t = typeof(T);
    var tProperties = t.GetProperties();

    var statements = new List<Expression>();

    var listInputParameter = Expression.Parameter(typeof(IList<T>));
    var itemPropertiesCount = Expression.Constant(tProperties.Length);
    var rowCountExpression = Expression.Property(listInputParameter, typeof(ICollection<T>), "Count");
    var arrayIndexVariable = Expression.Variable(typeof(int));
    var currentItem = Expression.Variable(t);
    
    statements.Add(Expression.Call(typeof(Console).GetMethod("WriteLine", new Type[] { typeof(string), typeof(string) })
                    , Expression.Constant("Converting {0} rows with expression tree")
                    , Expression.Call(rowCountExpression, typeof(object).GetMethod("ToString"))));

    // Initialize string array based on the items row count and the 
    var stringArrayVariable = Expression.Variable(typeof(string[,]));
    var newArrayExpression = Expression.NewArrayBounds(typeof(string), rowCountExpression, itemPropertiesCount);
    var initializeArray = Expression.Assign(stringArrayVariable, newArrayExpression);
    statements.Add(initializeArray);

    // Prepare item assignments here as it requires loop on the properties which can't be done inline
    var itemStatements = new List<Expression>();
    itemStatements.Add(Expression.Assign(currentItem, Expression.Property(listInputParameter, "Item", arrayIndexVariable)));
    for (int pIndex = 0; pIndex < tProperties.Length; pIndex++)
    {
        var propExpression = Expression.Property(currentItem, tProperties[pIndex]);
        itemStatements.Add(Expression.Assign(Expression.ArrayAccess(stringArrayVariable, new List<Expression> { arrayIndexVariable, Expression.Constant(pIndex) }), propExpression));
    }
    itemStatements.Add(Expression.PostIncrementAssign(arrayIndexVariable));

    // iterate the items
    var label = Expression.Label();
    var forLoopBody = Expression.Block(
        new[] { arrayIndexVariable }, // local variable
        Expression.Assign(arrayIndexVariable, Expression.Constant(0)), // initialize with 0
        Expression.Loop(
            Expression.IfThenElse(
                Expression.LessThan(arrayIndexVariable, rowCountExpression), // test
                Expression.Block(new[] { currentItem }, itemStatements), // execute if true
                Expression.Break(label)) // execute if false
            , label));
    statements.Add(forLoopBody);

    // return statement
    statements.Add(stringArrayVariable); 

    var body = Expression.Block(stringArrayVariable.Type, new[] { stringArrayVariable }, statements.ToArray());
    var compiled = Expression.Lambda<Func<IList<T>, string[,]>>(body, listInputParameter).Compile();
    return compiled;
}

the expression tree “body” in DebugView

.Block(System.String[,] $var1) {
    .Call System.Console.WriteLine("Converting {0} rows with expression tree", .Call ($var2.Count).ToString());
    $var1 = .NewArray System.String[$var2.Count,5];
    .Block(System.Int32 $var3) {
        $var3 = 0;
        .Loop  {
            .If ($var3 < $var2.Count) {
                .Block(ExcelDataPopulator.SampleObject $var4) {
                    $var4 = $var2.Item[$var3];
                    $var1[$var3,0] = $var4.A;
                    $var1[$var3,1] = $var4.B;
                    $var1[$var3,2] = $var4.C;
                    $var1[$var3,3] = $var4.D;
                    $var1[$var3,4] = $var4.E;
                    $var3++
                }
            } .Else {
                .Break #Label1 { }
            }
        }
        .LabelTarget #Label1:
    };
    $var1
}

From the performance benchmark side,

var sw = Stopwatch.StartNew();

var items = Enumerable.Range(1, 1000000).Select(i => new SampleObject { A = "A" + i, B = "B" + i, C = "C" + i, D = "D" + i, E = "E" + i }).ToList();
string[,] stringArray = null;

LogFormat("Initialized {0} rows, {1} ms", items.Count.ToString(), sw.ElapsedMilliseconds.ToString());
sw.Restart();

for (int i = 0; i < 10; i++)
    stringArray = ConvertTo2DStringArrayStatically(items);

LogFormat("Convert with statically is completed in {0} ms", sw.ElapsedMilliseconds.ToString());
sw.Restart();

for (int i = 0; i < 10; i++)
    stringArray = ConvertTo2DStringArrayWithReflection(items);

LogFormat("Convert with reflection is completed in {0} ms", sw.ElapsedMilliseconds.ToString());
sw.Restart();

for (int i = 0; i < 10; i++)
    stringArray = ConvertTo2DStringArrayWithExpressionTree<SampleObject>()(items);

LogFormat("Convert with expression tree is completed in {0} ms", sw.ElapsedMilliseconds.ToString());

// log output:
//Initialized 1,000,000 rows, 1,747 ms
//Convert with statically is completed in 1,077 ms
//Convert with reflection is completed in 10,915 ms
//Convert with expression tree is completed in 1,617 ms

So conclusions:

  1. statically will always be the most performant but maintainability is a bit of a challenging if we have many objects/types to support.
  2. reflection is 10x slower but the code is much simpler
  3. dynamic method via expression tree is having more code to build the expressions but the performance is only slightly slower than statically compiled option.

There are couple of more options available that I didn’t cover above:

  1. Code generation using class metadata/reflection
  2. Reflection.Emit

Full source code is available at https://github.com/bembengarifin/ExcelDataPopulator)

Advertisements

Generate Func by using Expression Trees

Posted on Updated on

I stumbled in a code change today which involved some value look-up in a dictionary based on some property name which is by convention (paramA + paramB + “property”).

So there was used to be a pre-calculated sum stored in the dictionary by a user control.

class SomeClass
{
    public Decimal DailyFunctionalAmount { get; set; }
    public Decimal MonthToDateFunctionalAmount { get; set; }
    ///..... have much more here
}

Decimal GetTotal(String fieldName, IDictionary<String, Decimal> lookups)
{
    // used to be as simple as getting the pre-calculated sum via dictionary (populated somewhere else by control)
    var totalByLookup = lookups[fieldName];
    return totalByLookup;
}

Now, I need to rely on the underlying data rows to calculate the sum.

var lists = new List<SomeClass> {
    new SomeClass { DailyFunctionalAmount = 10, MonthToDateFunctionalAmount = 100},
    new SomeClass { DailyFunctionalAmount = 10, MonthToDateFunctionalAmount = 100},
};

// I could not use the following code as this is compiled/pre-determined code where we actually need the selector in run-time based on the generated property name by convention
var totalByLinq = lists.Sum(r => r.DailyFunctionalAmount); 

Luckily, there is this expression trees which seems to be perfect for the job, rather than a nasty static function returning selector based on possible combinations for the property name by convention.

// returns -> dtoType => dtoType.propertyName
Func<SomeClass, Decimal> GetMyFunc(string propertyName)
{
    var dtoTypeParameter = Expression.Parameter(typeof(SomeClass), "dtoType");
    var dtoPropertySelector = Expression.Property(dtoTypeParameter, propertyName);
    var lamdaExpression = Expression.Lambda<Func<SomeClass, Decimal>>(dtoPropertySelector, new ParameterExpression[] { dtoTypeParameter });

    return lamdaExpression.Compile();
}

Decimal GetTotal(String fieldName, IEnumerable<SomeClass> lists)
{
    // solution - generate the func in run-time
    var runTimeSelector = GetMyFunc(fieldName);
    var totalByLinq = lists.Sum(runTimeSelector);
    return totalByLinq;
}

It’s surely world of possibilities with this expression trees ;)

Full source code below:

[TestClass]
public class UnitTest1
{
    [TestMethod]
    public void TestMethod1()
    {
        var lists = new List<SomeClass> {
            new SomeClass { DailyFunctionalAmount = 10, MonthToDateFunctionalAmount = 100},
            new SomeClass { DailyFunctionalAmount = 10, MonthToDateFunctionalAmount = 100},
        };

        var lookups = new Dictionary<String, Decimal>(); // stored with the property name as key in the dictionary and the sum as value
        lookups.Add("DailyFunctionalAmount", 20);
        lookups.Add("MonthToDateFunctionalAmount", 200);

        var dailyFunctionalFieldName = GetFieldName("Daily", "Functional");
        var monthToDateFunctionalFieldName = GetFieldName("MonthToDate", "Functional");

        // old way
        Assert.AreEqual(20, GetTotal(dailyFunctionalFieldName, lookups));
        Assert.AreEqual(200, GetTotal(monthToDateFunctionalFieldName, lookups));

        // new way 
        Assert.AreEqual(20, GetTotal(dailyFunctionalFieldName, lists));
        Assert.AreEqual(200, GetTotal(monthToDateFunctionalFieldName, lists));
    }

    Decimal GetTotal(String fieldName, IDictionary<String, Decimal> lookups)
    {
        // used to be as simple as getting the pre-calculated sum via dictionary (populated somewhere else by control)
        var totalByLookup = lookups[fieldName];
        return totalByLookup;
    }

    Decimal GetTotal(String fieldName, IEnumerable<SomeClass> lists)
    {
        // now it needs to be manually calculated from the list

        //var totalByLinq = lists.Sum(r => r.DailyFunctionalAmount); // but this is compiled/pre-determined code where we actually need the selector in run-time

        // solution - generate the func in run-time
        var runTimeSelector = GetMyFunc(fieldName);
        var totalByLinq = lists.Sum(runTimeSelector);
        return totalByLinq;
    }

    String GetFieldName(String periodic, String currency)
    {
        // the field name is generated by some convention [Daily/MonthToDate] + [Functional/Transaction] + "Amount"
        return periodic + currency + "Amount";
    }

    // returns -> dtoType => dtoType.propertyName
    Func<SomeClass, Decimal> GetMyFunc(string propertyName)
    {
        var dtoTypeParameter = Expression.Parameter(typeof(SomeClass), "dtoType");
        var dtoPropertySelector = Expression.Property(dtoTypeParameter, propertyName);
        var lamdaExpression = Expression.Lambda<Func<SomeClass, Decimal>>(dtoPropertySelector, new ParameterExpression[] { dtoTypeParameter });

        return lamdaExpression.Compile();
    }
}

class SomeClass
{
    public Decimal DailyFunctionalAmount { get; set; }
    public Decimal MonthToDateFunctionalAmount { get; set; }
    ///..... have much more here
}