001/**
002 *
003 * Copyright (c) 2014, the Railo Company Ltd. All rights reserved.
004 *
005 * This library is free software; you can redistribute it and/or
006 * modify it under the terms of the GNU Lesser General Public
007 * License as published by the Free Software Foundation; either 
008 * version 2.1 of the License, or (at your option) any later version.
009 * 
010 * This library is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013 * Lesser General Public License for more details.
014 * 
015 * You should have received a copy of the GNU Lesser General Public 
016 * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
017 * 
018 **/
019package lucee.runtime.text.csv;
020
021import java.util.List;
022import java.util.Set;
023import java.util.TreeSet;
024
025import lucee.runtime.exp.PageException;
026import lucee.runtime.type.Array;
027import lucee.runtime.type.ArrayImpl;
028import lucee.runtime.type.Query;
029import lucee.runtime.type.QueryImpl;
030
031
032public class CSVParser {
033
034    public static Query toQuery( String csv, char delimiter, char textQualifier, String[] headers, boolean firstRowIsHeaders ) throws CSVParserException, PageException {
035        List<List<String>> allRows = ( new CSVString( csv, delimiter ).parse() );
036        int numRows = allRows.size();
037        
038        // no records
039        if ( numRows == 0) {
040            if(firstRowIsHeaders || headers==null)
041                throw new CSVParserException( "No data found in CSV string");
042
043            return new QueryImpl( headers, 0, "query" );
044        }
045        
046        List<String> row = allRows.get( 0 );
047        int numCols = row.size();
048        int curRow = 0;
049        
050        // set first line to header
051        if ( firstRowIsHeaders ) {
052            curRow++;
053            if ( headers == null )
054                headers = makeUnique( row.toArray( new String[ numCols ] ) );
055        }
056
057        // create first line for header
058        if( headers == null )  {
059            headers = new String[ numCols ];
060            for ( int i=0; i < numCols; i++ )
061                headers[ i ] = "COLUMN_" + ( i + 1 );
062        }
063
064        Array[] arrays = new Array[ numCols ];  // create column Arrays
065        for( int i=0; i < numCols; i++ )
066            arrays[ i ] = new ArrayImpl();
067
068        while ( curRow < numRows ) {
069            row = allRows.get( curRow++ );
070            if ( row.size() != numCols )
071                throw new CSVParserException( "Invalid CSV line size, expected " + numCols + " columns but found " + row.size() + " instead", row.toString() );
072            for ( int i=0; i < numCols; i++ ) {
073                arrays[ i ].append( row.get( i ) );
074            }
075        }
076        return new QueryImpl( headers, arrays, "query" );
077    }
078
079        private static String[] makeUnique( String[] headers ) {
080
081        int c = 1;
082        Set set = new TreeSet( String.CASE_INSENSITIVE_ORDER );
083        String header, orig;
084
085        for (int i=0; i<headers.length; i++) {
086
087            orig = header = headers[ i ];
088
089            while ( set.contains( header ) )
090                header = orig + "_" + ++c;
091
092            set.add( header );
093
094            if ( header != orig )       // ref comparison for performance
095                headers[ i ] = header;
096        }
097
098        return headers;
099    }
100
101}