How to clean embedded quotes from CSV files with line breaks

Wednesday, March 31, 2010

I built this to use within an SSIS import:

    Dim blnLastFullLine As Boolean = False
    Dim blnThisFullLine As Boolean = False

    Public Sub Main()
        oneFile("file.csv")
        Dts.TaskResult = Dts.Results.Success
    End Sub

    Function oneFile(ByVal file As String) As String
        Dim sReader As New System.IO.StreamReader(file)
        Dim sWriter As New System.IO.StreamWriter(Replace(file, ".csv", "Clean.csv"))
        Do
            sWriter.WriteLine(fixQuotes(sReader.ReadLine()))
        Loop Until sReader.Peek = -1
        sWriter.Close()
        sReader.Close()
    End Function

    Function fixQuotes(ByVal str As String) As String
        Dim delim As String = """"                      ' "
        Dim qiStr As String = Chr(34) & "," & Chr(34)   ' ","
        Dim doubleQuote As String = Chr(34) & Chr(34)   ' ""
        Dim strStart, strEnd As String
        strStart = ""
        strEnd = ""
        If Left(str, delim.Length) = delim And Left(str, qiStr.Length) <> qiStr And blnLastFullLine Then
            str = Right(str, str.Length - delim.Length)
            strStart = delim
        End If

        If Right(str, 2) = "," & delim Then
            If Right(str, 3) = qiStr Then
                blnThisFullLine = False
            Else
                blnThisFullLine = True
            End If
        Else
            If (Right(str, 2) = doubleQuote And Right(str, 3) <> "," & doubleQuote) And Right(str, 3) <> delim & doubleQuote Then
                blnThisFullLine = False
            Else
                blnThisFullLine = True
            End If
        End If


        If Right(str, delim.Length) = delim And Right(str, qiStr.Length) <> qiStr And blnThisFullLine Then
            str = Left(str, str.Length - delim.Length)
            strEnd = delim
            blnLastFullLine = True
        Else
            blnLastFullLine = False
        End If

        If str.Length > 2 Then
            str = Replace(str, qiStr, "|&|")
            str = Replace(str, Chr(34), """)
            fixQuotes = strStart & Replace(str, "|&|", qiStr) & strEnd
        End If
    End Function

0 Comments

Comment on this article

Name:
Email:
Web:
Turing: Just to be sure you're a person, please enter our family last name.
Comments without an email address, or with any kind of HTML tags will not be accepted.