This is the first way
- This is the first way
- SELECT t.*
- FROM
- ( SELECT u.PK1 ,u.PK2 FROM MyTable u
- WHERE u.PK1 is not null and u.PK2 is not null
- GROUP BY u.PK1 ,u.PK2
- HAVING COUNT(*) = 1 ) s
- LEFT JOIN MyTable t
- ON t.PK1=s.PK1 and t.PK1=s.PK2
- WHERE t.PK1 is not null and t.PK2 is not null
You can use inner join too, but in this scenario it is faster to use left join because it doesn't have to check every records for each row. Thanks to Anita for helping me with this query, being inspired too by this article written by Pinal Dave. By the way this query is not the only way, u can use another trick or even use store procedure.
This is the second way, the important thing is YOU MUST SORT the content in dtSource and dtDup base on the primary keys respectively. I noticed that there are different behaviors of DataTable if u create it manually and if u use DataSet then populate the records from database. If u use DataTable that u create manually when u delete the row, then the row will be deleted permanently and the index will changed, but if u use DataSet then it will be marked as deleted it will be deleted if u use DataTable.AcceptChanges. If u use DataSet then look at my code with comment "//THIS LINE SHOULD BE REMOVED", statement "i--;" in that line should be removed, but if you use DataTable that u create manually u need that statement, it took a while for me to test and realize there are something odd with the result before, now it fix :) .
- class Program
- {
- static void Main(string[] args)
- {
- //DataTable that contain all records
- DataTable dtSource = new DataTable();
- dtSource.Columns.Add("PK1");
- dtSource.Columns.Add("PK2");
- dtSource.Columns.Add("Column1");
- dtSource.Columns.Add("Column2");
- Random rnd = new Random();
- int j = 0;
- ArrayList arr = new ArrayList();
- for (int i = 0; i < 10 ; i ++ )
- {
- j = rnd.Next(1, 3);
- if (j == 2) arr.Add(i);
- for (int k = 0; k < j ; k ++ )
- {
- DataRow dr = dtSource.NewRow();
- dr["PK1"] = "PK1" + i;
- dr["PK2"] = "PK2" + i;
- dr["Column1"] = "C1" + i;
- dr["Column2"] = "C2" + i;
- dtSource.Rows.Add(dr);
- }
- }
- //DataTable that only contains rows with the duplicate primary keys
- DataTable dtDup = new DataTable();
- dtDup.Columns.Add("PK1");
- dtDup.Columns.Add("PK2");
- for (int i = 0; i < arr.Count ; i ++ )
- {
- DataRow dr = dtDup.NewRow();
- dr["PK1"] = "PK1" + arr[i].ToString();
- dr["PK2"] = "PK2" + arr[i].ToString();
- dtDup.Rows.Add(dr);
- }
- PrintRows(dtSource);
- Console.WriteLine();
- Program obj = new Program();
- string [] dupPrimary = new string[2]{"PK1", "PK2"};
- obj.RemoveDuplicatePrimaryKey( ref dtSource, dtDup, dupPrimary );
- PrintRows(dtSource);
- Console.ReadLine();
- }
- private static void PrintRows(DataTable dt)
- {
- for (int i = 0; i < dt.Rows.Count; i++)
- Console.WriteLine("PK1: {0}, PK2: {1}, Column1: {2}, Column2: {3}",
- dt.Rows[i]["PK1"], dt.Rows[i]["PK2"],
- dt.Rows[i]["Column1"], dt.Rows[i]["Column2"]);
- }
- public void RemoveDuplicatePrimaryKey
- (ref DataTable Source, DataTable Target, string[] PrimaryKeys)
- {
- DataColumn[] keyColumns = new DataColumn[PrimaryKeys.Length];
- for (int i = 0; i < PrimaryKeys.Length; i++)
- {
- keyColumns[i] = Source.Columns[ PrimaryKeys[i] ];
- }
- bool bStart = false, bEnd = false;
- int j = 0;
- bool bDuplicate;
- for (int i = 0; i < Source.Rows.Count; i++)
- {
- DataRow drSource = Source.Rows[i];
- bDuplicate = IsDuplicate(drSource, Target.Rows[j], keyColumns);
- if (!bDuplicate)
- {
- if (bEnd) break;
- else if (bStart)
- {
- bStart = false;
- j++;
- i--;
- }
- else if (j == 0) continue;
- }
- else
- {
- Source.Rows[i].Delete();
- bStart = true;
- if (j == Target.Rows.Count - 1) bEnd = true;
- i--; //THIS LINE SHOULD BE REMOVED
- }
- }
- Source.AcceptChanges();
- }
- private bool IsDuplicate
- (DataRow drSource, DataRow drTarget, DataColumn[] dcPrimaryKeys)
- {
- bool retVal = true;
- foreach (DataColumn column in dcPrimaryKeys)
- {
- retVal = retVal &&
- (drSource[column.ColumnName].ToString() ==
- drTarget[column.ColumnName].ToString());
- if (!retVal) break;
- }
- return retVal;
- }
- }
In the real case, I use query to populate data into dtSource and dtDup, here they are:
- SELECT PK1, PK2, Column1, Column2
- FROM MyTable
- WHERE PK1 is not null and PK2 is not null
- ORDER BY PK1, PK2
- SELECT PK1, PK2
- FROM MyTable
- WHERE PK1 is not null and PK2 is not null
- GROUP BY PK1, PK2
- HAVING COUNT(*) = 1
- ORDER BY PK1, PK2
No comments:
Post a Comment