001 002 package railo.runtime.text.csv; 003 004 import java.util.ArrayList; 005 006 import railo.runtime.exp.DatabaseException; 007 import railo.runtime.exp.PageException; 008 import railo.runtime.type.Array; 009 import railo.runtime.type.ArrayImpl; 010 import railo.runtime.type.Query; 011 import railo.runtime.type.QueryImpl; 012 import railo.transformer.util.CFMLString; 013 014 015 /** 016 * Class to parse a CVS File 017 */ 018 public final class CSVParser { 019 020 /** 021 * parse a CVS File 022 * @param csv 023 * @param delimiter 024 * @param textQualifier 025 * @param headers 026 * @param firstrowasheaders 027 * @return parse CVS as Query 028 * @throws DatabaseException 029 * @throws CSVParserException 030 * @throws PageException 031 */ 032 public static Query toQuery(String csv,char delimiter, char textQualifier,String[] headers, boolean firstrowasheaders) throws DatabaseException, CSVParserException, PageException { 033 //print.ln("delimiter:"+delimiter); 034 //print.ln("textQualifier:"+textQualifier); 035 String[] first=null; 036 037 CFMLString cfmlStr = new CFMLString(csv,"UTF-8"); 038 //print.ln(cfmlStr.toString()); 039 // no predefined Header 040 if(headers==null) { 041 // read first line 042 first=readFirstLine(cfmlStr,delimiter,textQualifier); 043 cfmlStr.removeSpace(); 044 045 // set first line to header 046 if(firstrowasheaders) { 047 headers=first; 048 first=null; 049 } 050 // create auto header 051 else { 052 headers=new String[first.length]; 053 for(int i=0;i<first.length;i++) { 054 headers[i]="COLUMN_"+(i+1); 055 } 056 } 057 } 058 // remove first line when header is defined and firstrowasheaders is true 059 else if(!cfmlStr.isAfterLast() && firstrowasheaders){ 060 readFirstLine(cfmlStr,delimiter,textQualifier); 061 } 062 063 // create column Array 064 Array[] arrays=new Array[headers.length]; 065 for(int i=0;i<arrays.length;i++) { 066 arrays[i]=new ArrayImpl(); 067 } 068 // fill first row to data array, when not empty 069 if(first!=null) { 070 for(int i=0;i<arrays.length;i++) { 071 arrays[i].append(first[i]); 072 } 073 } 074 075 076 // read Body 077 //int count=0; 078 while(!cfmlStr.isAfterLast()) { 079 readLine(cfmlStr,delimiter,textQualifier,arrays); 080 cfmlStr.removeSpace(); 081 } 082 083 cfmlStr.removeSpace(); 084 085 return new QueryImpl(headers,arrays,"query"); 086 087 } 088 089 090 091 092 /** 093 * @param cfmlStr 094 * @param delimiter 095 * @param textQualifier 096 * @return read the first Line of the CVS 097 * @throws CSVParserException 098 */ 099 private static String[] readFirstLine(CFMLString cfmlStr, char delimiter, char textQualifier) throws CSVParserException { 100 ArrayList list=new ArrayList(); 101 102 do { 103 String value=readValue(cfmlStr,delimiter,textQualifier); 104 list.add(value); 105 if(delimiter!=' ')cfmlStr.removeSpace(); 106 if(!cfmlStr.isAfterLast() && cfmlStr.getCurrent()==delimiter) { 107 cfmlStr.next(); 108 if(cfmlStr.getCurrent()=='\n')break; 109 } 110 else break; 111 } 112 while(!cfmlStr.isAfterLast()); 113 114 return (String[])list.toArray(new String[list.size()]); 115 } 116 117 /** 118 * read a Line of the CVS File 119 * @param cfmlStr 120 * @param delimiter 121 * @param textQualifier 122 * @param arrays 123 * @throws CSVParserException 124 * @throws PageException 125 */ 126 private static void readLine(CFMLString cfmlStr, char delimiter, char textQualifier, Array[] arrays) throws CSVParserException, PageException { 127 //print.ln("-----------------"); 128 //String[] arr=new String[len]; 129 int index=0; 130 do { 131 if(index>=arrays.length)throw new CSVParserException("invalid column count ("+index+"), only "+arrays.length+" columns are allowed"); 132 133 String value = readValue(cfmlStr,delimiter,textQualifier); 134 //print.ln("["+value+"]"+delimiter+":"+textQualifier); 135 arrays[index++].append(value); 136 //arr[index++]=readValue(cfmlStr,delimiter,textQualifier); 137 if(delimiter!=' ')cfmlStr.removeSpace(); 138 if(!cfmlStr.isAfterLast() && cfmlStr.getCurrent()==delimiter) { 139 cfmlStr.next(); 140 if(cfmlStr.getCurrent()=='\n' || cfmlStr.getCurrent()=='\r')break; 141 //if(cfmlStr.getCurrent()=='\n')break; 142 } 143 else break; 144 145 } 146 while(!cfmlStr.isAfterLast()); 147 148 if(arrays.length!=index) 149 throw new CSVParserException("invalid column count, at least "+arrays.length+" columns must be defined"); 150 } 151 152 153 154 155 /** 156 * Reads a Single value from a CSV File 157 * @param cfmlStr CFML String containig csv 158 * @param delimiter delimiter splits values 159 * @param textQualifier text qualifier of the value 160 * @return parsed value 161 * @throws CSVParserException 162 */ 163 private static String readValue(CFMLString cfmlStr,char delimiter,char textQualifier) throws CSVParserException { 164 StringBuffer sb=new StringBuffer(); 165 cfmlStr.removeSpace(); 166 // Quoted String 167 if(cfmlStr.forwardIfCurrent(textQualifier)) { 168 while(true) { 169 if(cfmlStr.isAfterLast()) 170 throw new CSVParserException("invalid CSV File, missing end Qualifier ["+textQualifier+"]"); 171 172 if(cfmlStr.isCurrent(textQualifier)) { 173 cfmlStr.next(); 174 if(cfmlStr.isCurrent(textQualifier)) { 175 sb.append(textQualifier); 176 } 177 else { 178 break; 179 } 180 } 181 else sb.append(cfmlStr.getCurrent()); 182 cfmlStr.next(); 183 } 184 return sb.toString(); 185 } 186 // Pure String 187 while(!cfmlStr.isAfterLast()) { 188 if(cfmlStr.isCurrent(delimiter) || cfmlStr.getCurrent()=='\n') { 189 break; 190 } 191 sb.append(cfmlStr.getCurrent()); 192 cfmlStr.next(); 193 } 194 return sb.toString().trim(); 195 196 } 197 }