001    
002    package railo.runtime.text.csv;
003    
004    import java.util.ArrayList;
005    
006    import railo.runtime.exp.DatabaseException;
007    import railo.runtime.exp.PageException;
008    import railo.runtime.type.Array;
009    import railo.runtime.type.ArrayImpl;
010    import railo.runtime.type.Query;
011    import railo.runtime.type.QueryImpl;
012    import railo.transformer.util.CFMLString;
013    
014    
015    /**
016     * Class to parse a CVS File 
017     */
018    public final class CSVParser {
019            
020            /**
021             * parse a CVS File
022             * @param csv
023             * @param delimiter
024             * @param textQualifier
025             * @param headers
026             * @param firstrowasheaders
027             * @return parse CVS as Query
028             * @throws DatabaseException
029             * @throws CSVParserException
030             * @throws PageException
031             */
032             public static Query toQuery(String csv,char delimiter, char textQualifier,String[] headers, boolean firstrowasheaders) throws DatabaseException, CSVParserException, PageException {
033                    //print.ln("delimiter:"+delimiter);
034                    //print.ln("textQualifier:"+textQualifier);
035                    String[] first=null;
036                    
037                    CFMLString cfmlStr = new CFMLString(csv,"UTF-8");
038                    //print.ln(cfmlStr.toString());
039            // no predefined Header
040                    if(headers==null) {
041                            // read first line
042                            first=readFirstLine(cfmlStr,delimiter,textQualifier);
043                            cfmlStr.removeSpace();
044                            
045                            // set first line to header
046                            if(firstrowasheaders) {
047                                    headers=first;
048                                    first=null;
049                            }
050                            // create auto header
051                            else {
052                                    headers=new String[first.length];
053                                    for(int i=0;i<first.length;i++) {
054                                            headers[i]="COLUMN_"+(i+1);
055                                    }
056                            }
057                    }
058                    // remove first line when header is defined and firstrowasheaders is true
059                    else if(!cfmlStr.isAfterLast() && firstrowasheaders){
060                            readFirstLine(cfmlStr,delimiter,textQualifier);
061                    }
062                    
063                    // create column Array
064                    Array[] arrays=new Array[headers.length];
065                    for(int i=0;i<arrays.length;i++) {
066                            arrays[i]=new ArrayImpl();
067                    }
068                    // fill first row to data array, when not empty
069                    if(first!=null) {
070                            for(int i=0;i<arrays.length;i++) {
071                                    arrays[i].append(first[i]);
072                            }       
073                    }
074                    
075                                    
076                    // read Body
077            //int count=0;
078                    while(!cfmlStr.isAfterLast()) {
079                readLine(cfmlStr,delimiter,textQualifier,arrays);
080                            cfmlStr.removeSpace();
081                    }
082                    
083                    cfmlStr.removeSpace();
084                    
085                    return new QueryImpl(headers,arrays,"query");
086                    
087            }
088            
089            
090            
091            
092            /**
093             * @param cfmlStr
094             * @param delimiter
095             * @param textQualifier
096             * @return read the first Line of the CVS
097             * @throws CSVParserException
098             */
099            private static String[] readFirstLine(CFMLString cfmlStr, char delimiter, char textQualifier) throws CSVParserException {
100                    ArrayList list=new ArrayList();
101                    
102                    do {
103                            String value=readValue(cfmlStr,delimiter,textQualifier);
104                            list.add(value);
105                            if(delimiter!=' ')cfmlStr.removeSpace();
106                            if(!cfmlStr.isAfterLast() && cfmlStr.getCurrent()==delimiter) {
107                    cfmlStr.next();
108                    if(cfmlStr.getCurrent()=='\n')break;
109                }
110                            else break;
111                    }
112                    while(!cfmlStr.isAfterLast());
113                    
114                    return (String[])list.toArray(new String[list.size()]);
115            }       
116    
117            /**
118             * read a Line of the CVS File
119             * @param cfmlStr
120             * @param delimiter
121             * @param textQualifier
122             * @param arrays
123             * @throws CSVParserException
124             * @throws PageException
125             */
126            private static void readLine(CFMLString cfmlStr, char delimiter, char textQualifier, Array[] arrays) throws CSVParserException, PageException {
127            //print.ln("-----------------");
128            //String[] arr=new String[len];
129                    int index=0;
130                    do {
131                            if(index>=arrays.length)throw new CSVParserException("invalid column count ("+index+"), only "+arrays.length+" columns are allowed");
132                
133                String value = readValue(cfmlStr,delimiter,textQualifier);
134                //print.ln("["+value+"]"+delimiter+":"+textQualifier);
135                arrays[index++].append(value);
136                            //arr[index++]=readValue(cfmlStr,delimiter,textQualifier);
137                            if(delimiter!=' ')cfmlStr.removeSpace();
138                            if(!cfmlStr.isAfterLast() && cfmlStr.getCurrent()==delimiter) {
139                    cfmlStr.next();
140                    if(cfmlStr.getCurrent()=='\n' || cfmlStr.getCurrent()=='\r')break;
141                    //if(cfmlStr.getCurrent()=='\n')break;
142                }
143                            else break;
144                            
145                    }
146                    while(!cfmlStr.isAfterLast());
147                    
148                    if(arrays.length!=index)
149                            throw new CSVParserException("invalid column count, at least "+arrays.length+" columns must be defined");
150            }
151    
152    
153    
154    
155            /**
156             * Reads a Single value from a CSV File
157             * @param cfmlStr CFML String containig csv
158             * @param delimiter delimiter splits values
159             * @param textQualifier text qualifier of the value
160             * @return parsed value
161             * @throws CSVParserException
162             */
163            private static String readValue(CFMLString cfmlStr,char delimiter,char textQualifier) throws CSVParserException {
164                    StringBuffer sb=new StringBuffer();
165                    cfmlStr.removeSpace();
166                    // Quoted String
167                    if(cfmlStr.forwardIfCurrent(textQualifier)) {
168                            while(true) {
169                                    if(cfmlStr.isAfterLast())
170                                            throw new CSVParserException("invalid CSV File, missing end Qualifier ["+textQualifier+"]");
171                                    
172                                    if(cfmlStr.isCurrent(textQualifier)) {
173                                            cfmlStr.next();
174                                            if(cfmlStr.isCurrent(textQualifier)) {
175                                                    sb.append(textQualifier);
176                                            }
177                                            else {
178                                                    break;
179                                            }
180                                    }
181                                    else sb.append(cfmlStr.getCurrent());
182                                    cfmlStr.next();
183                            }
184                            return sb.toString();
185                    }
186                    // Pure String  
187                    while(!cfmlStr.isAfterLast()) {
188                            if(cfmlStr.isCurrent(delimiter) || cfmlStr.getCurrent()=='\n') {
189                                    break;
190                            }
191                            sb.append(cfmlStr.getCurrent());
192                            cfmlStr.next();
193                    }
194                    return sb.toString().trim();
195    
196            }
197    }