How to clean embedded quotes from CSV files with line breaks
Wednesday, March 31, 2010
I built this to use within an SSIS import:
Dim blnLastFullLine As Boolean = False Dim blnThisFullLine As Boolean = False Public Sub Main() oneFile("file.csv") Dts.TaskResult = Dts.Results.Success End Sub Function oneFile(ByVal file As String) As String Dim sReader As New System.IO.StreamReader(file) Dim sWriter As New System.IO.StreamWriter(Replace(file, ".csv", "Clean.csv")) Do sWriter.WriteLine(fixQuotes(sReader.ReadLine())) Loop Until sReader.Peek = -1 sWriter.Close() sReader.Close() End Function Function fixQuotes(ByVal str As String) As String Dim delim As String = """" ' " Dim qiStr As String = Chr(34) & "," & Chr(34) ' "," Dim doubleQuote As String = Chr(34) & Chr(34) ' "" Dim strStart, strEnd As String strStart = "" strEnd = "" If Left(str, delim.Length) = delim And Left(str, qiStr.Length) <> qiStr And blnLastFullLine Then str = Right(str, str.Length - delim.Length) strStart = delim End If If Right(str, 2) = "," & delim Then If Right(str, 3) = qiStr Then blnThisFullLine = False Else blnThisFullLine = True End If Else If (Right(str, 2) = doubleQuote And Right(str, 3) <> "," & doubleQuote) And Right(str, 3) <> delim & doubleQuote Then blnThisFullLine = False Else blnThisFullLine = True End If End If If Right(str, delim.Length) = delim And Right(str, qiStr.Length) <> qiStr And blnThisFullLine Then str = Left(str, str.Length - delim.Length) strEnd = delim blnLastFullLine = True Else blnLastFullLine = False End If If str.Length > 2 Then str = Replace(str, qiStr, "|&|") str = Replace(str, Chr(34), """) fixQuotes = strStart & Replace(str, "|&|", qiStr) & strEnd End If End Function
Comment on this article