@smitpatel ran a few custom batching benchmarks recently that showed interesting results. His benchmarks focuses on INSERT batching for the same entity, but similar analysis could be performed for other operations.
The test inserts 1000 rows on a table in SQL Server. There are two separate series, one for a local instance and one for a remote instance. The chart shows the time elapsed (on Y) for different batch sizes (on X):

To make sense if this chart, keep in mind that at maximum batch size of 1, there is now batching, and we resort to different SQL that has a smaller fixed cost.
There are a few interesting things to observe:
Possible conclusions and follow up actions:
The code of the test follows:
``` C#
using System;
using System.Collections.Generic;
using System.Diagnostics;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
namespace ConsoleApp6
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Hello World!");
new Test().Run();
}
}
public class Test
{
protected OrdersContext Context;
protected string ConnectionString = "Server=(localdb)\\mssqllocaldb;Database=Benchmarks;Trusted_Connection=True;Database=Perf_UpdatePipeline_Simple";
public void Run()
{
var stopwatch = new Stopwatch();
for (var i = 0; i < 150; i++)
{
for (var j = 0; j < 2; j++)
{
Context = CreateContext(i);
var beginTransaction = Context.Database.BeginTransaction();
var customers = CreateCustomers(1000, setPrimaryKeys: false);
Context.Customers.AddRange(customers);
stopwatch.Reset();
stopwatch.Start();
Context.SaveChanges();
stopwatch.Stop();
if (j != 0)
Console.WriteLine($"BatchSize={i}=>{stopwatch.Elapsed}");
beginTransaction.Dispose();
Context.Dispose();
}
}
}
public virtual OrdersContext CreateContext(int batchSize) => new OrdersContext(ConnectionString, batchSize);
public class OrdersContext : DbContext
{
private readonly string _connectionString;
private readonly bool _disableBatching;
private readonly int _batchSize;
public OrdersContext(string connectionString, bool disableBatching = false)
{
_connectionString = connectionString;
_disableBatching = disableBatching;
}
public OrdersContext(string connectionString, int batchSize)
{
_connectionString = connectionString;
_batchSize = batchSize;
}
public DbSet<Customer> Customers { get; set; }
public DbSet<Order> Orders { get; set; }
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
=> optionsBuilder
.UseSqlServer(
_connectionString,
b =>
{
if (_batchSize != 0)
{
b.MaxBatchSize(_batchSize);
}
});
}
public virtual List<Customer> CreateCustomers(int customerCount, bool setPrimaryKeys)
{
var customers = new List<Customer>();
for (var c = 0; c < customerCount; c++)
{
customers.Add(
new Customer
{
CustomerId = setPrimaryKeys ? c + 1 : 0,
Title = c % 2 == 0 ? "Mr" : "Mrs",
FirstName = "Customer " + c,
LastName = "Customer " + c,
DateOfBirth = new DateTime(1980, c % 12 + 1, 1),
IsLoyaltyMember = c % 3 == 0,
Joined = new DateTime(2000, c % 12 + 1, 1),
OptedOutOfMarketing = c % 7 == 0,
Phone = "555-555-5555",
Email = $"customer{c}@sample.com",
AddressLineOne = $"{c} Sample St",
City = "Sampleville",
StateOrProvince = "SMP",
ZipOrPostalCode = "00000",
Country = "United States"
});
}
return customers;
}
public virtual List<Order> CreateOrders(List<Customer> customers, int ordersPerCustomer, bool setPrimaryKeys)
{
var orders = new List<Order>();
for (var c = 0; c < customers.Count; c++)
{
for (var i = 0; i < ordersPerCustomer; i++)
{
orders.Add(
new Order
{
OrderId = setPrimaryKeys ? c * ordersPerCustomer + i + 1 : 0,
CustomerId = customers[c].CustomerId,
Date = new DateTime(2000, 1, 1),
OrderDiscount = i % 3,
DiscountReason = i % 3 == 0 ? null : "They seemed nice",
Tax = i % 10,
Addressee = "Person " + i,
AddressLineOne = $"{i} Sample St",
City = "Sampleville",
StateOrProvince = "SMP",
ZipOrPostalCode = "00000",
Country = "United States"
});
}
}
return orders;
}
public class Order
{
public int OrderId { get; set; }
public DateTime Date { get; set; }
public string SpecialRequests { get; set; }
public decimal OrderDiscount { get; set; }
public string DiscountReason { get; set; }
public decimal Tax { get; set; }
public string Addressee { get; set; }
public string AddressLineOne { get; set; }
public string AddressLineTwo { get; set; }
public string City { get; set; }
public string StateOrProvince { get; set; }
public string ZipOrPostalCode { get; set; }
public string Country { get; set; }
public int CustomerId { get; set; }
public Customer Customer { get; set; }
}
public class Customer
{
public int CustomerId { get; set; }
public string Title { get; set; }
public string FirstName { get; set; }
public string LastName { get; set; }
public DateTime? DateOfBirth { get; set; }
public bool IsLoyaltyMember { get; set; }
public DateTime Joined { get; set; }
public bool OptedOutOfMarketing { get; set; }
public string Phone { get; set; }
public string Email { get; set; }
public string AddressLineOne { get; set; }
public string AddressLineTwo { get; set; }
public string City { get; set; }
public string StateOrProvince { get; set; }
public string ZipOrPostalCode { get; set; }
public string Country { get; set; }
public ICollection<Order> Orders { get; set; }
}
}
}
```
cc @smitpatel @AndriySvyryd @anpete
cc @roji since recently we talked about similar observations.
What is the default for SQL Server MaxBatchSize option?
@ErikEJ No limit, which in practice means we split to a new batch every 2100 parameters.
For this test it ends up being about 130.
Hmmm... I always told people you had a default of 1000... There you go. I will strongly suggest anyone to lower if they are doing mass inserts
Interesting stuff, it would be great to be able to run this benchmark as-is on PostgreSQL.
I think I ran something similar a while ago and found simpler results, i.e. that batching only improves performance (or at least doesn't degrade it), but it would be good to make sure.
I updated the code. It sets different batch size and run it twice. (first one is ignored as warmup)
Team decision: default size = 'E' + 'F'
Also consider https://github.com/aspnet/EntityFrameworkCore/pull/10091#issuecomment-406822986
Let's set it to ~40.
Forty-two.
Just forty-two.
4 then 2.
I have a multi threading application that needs to insert bulk of records in the same table from different places. After continuously running 2 Hrs my application gets connection time out issue
_Stack Trace_
System.Data.SqlClient.SqlException (0x80131904): Timeout expired. The timeout period elapsed prior to completion of the operation or the server is not responding. ---> System.ComponentModel.Win32Exception (258): The wait operation timed out
at System.Data.SqlClient.SqlConnection.OnError(SqlException exception, Boolean breakConnection, Action`1 wrapCloseInAction)
at System.Data.SqlClient.TdsParser.ThrowExceptionAndWarning(TdsParserStateObject stateObj, Boolean callerHasConnectionLock, Boolean asyncClose)
at System.Data.SqlClient.TdsParser.TryRun(RunBehavior runBehavior, SqlCommand cmdHandler, SqlDataReader dataStream, BulkCopySimpleResultSet bulkCopyHandler, TdsParserStateObject stateObj, Boolean& dataReady)
at System.Data.SqlClient.SqlDataReader.TryConsumeMetaData()
at System.Data.SqlClient.SqlDataReader.get_MetaData()
I suspect this may be the cause of deadlock in a particular table. I verified in SQL that table is on deadlock.
I have tried the following solution
When running the multi threading application it says MSDTC Server on unavailble . Note: I started the Distributed Transaction Coordinator service but still i get your environment not support distributed transactions
I tried with SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED but still i am getting the same issue after 2Hrs running continuously
I added this SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED between the BeginTransaction and Commit transaction in my C# code but no luck.
Could you please someone can help me to overcome from this ?
@gopal-k please open a separate issue, your problem isn't really related to this one, discussion can continue there.
This also probably belongs on stackoverflow rather than here - this doesn't seem related to Entity Framework in any way.
Set it to 42!
And update the XML documentation
Most helpful comment
Set it to 42!