CreateLayoutFromCSV PROCEDURE (String pFileName) ! Declare Procedure !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= ! ! written on 11th June 2021 by Geoff Robinson vitesse AT gmail DOT com ! (c) 2021 by Geoffrey C. Robinson - Released under the MIT License ! ! see: https://clarionhub.com/t/create-a-file-layout-from-a-csv-file/4232 ! ! 14th June 2021 - GCR incorporated type detection for fields as suggested by Carl Barnes ! - added underscore separated as well as existing Capitalized field names ! - now outputs six different formats ! !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= str StringTheory lne StringTheory fld StringTheory errs StringTheory out StringTheory tmp StringTheory header StringTheory ! format header trailer StringTheory ! format trailer x Long,auto y Long,auto lines Long,auto cols Long,auto decLen Long,auto attribs group,type ! column attibutes group max long ! maximum characters notNumeric BYTE ! 1=Column w/o a Number found so String digitsLeft BYTE ! Max Digits Left of Decimal Point digitsRight BYTE ! Max Digits Right of Decimal Point i.e. pennies isNegative BYTE ! Less than Zero seen end Q queue Row1 group(attribs). ! attributes of first row Rest group(attribs). ! attributes of the rest of the rows end attribsPtr &attribs dot LONG ! Decimal Point numLeft BYTE ! Digits Left of Decimal Point numRight BYTE ! Digits Right of Decimal Point allString BYTE capitalize BYTE CODE if not str.LoadFile(pFilename) fld.setValue('Error: Failed to load ' & clip(pFileName) & ' Error: ' & str.lastError) do ReportError return end str.Split('<13,10>','"') str.removeLines() ! get rid of any empty lines lines = str.records() if not lines fld.setValue('Error: No CSV lines loaded for ' & clip(pFileName)) do ReportError return end Lne.SetValue(Str.GetLine(1)) ! get first line (possibly header) Lne.Split(',','"','"',true) cols = lne.records() attribsPtr &= Q.row1 loop y = 1 to cols ! set row1 width of each column clear(Q) fld.setValue(lne.GetLine(y)) fld.replace('""','"') Q.row1.max = fld.length() if ~Q.row1.notNumeric then do CheckNumber. add(Q) end attribsPtr &= Q.rest ! attributes for subsequent rows (after the first one) loop x = 2 to lines ! check each row to get max width for each column Lne.SetValue(Str.GetLine(x)) Lne.Split(',','"','"',true) if cols <> lne.records() fld.setValue('Inconsistent number of columns on line ' & x & | ': Expected ' & cols & ' but found ' & lne.records()) do reportError end loop y = 1 to lne.records() ! loop through columns fld.setValue(lne.GetLine(y)) fld.replace('""','"') get(Q,y) if errorcode() clear(Q) Q.rest.max = fld.length() add(Q,y) fld.setValue('Adding column ' & y & ' to lengths attribute queue on line ' & x) do reportError elsif fld._DataEnd > Q.rest.max Q.rest.max = fld._DataEnd ! new maximum width for this column end if ~Q.rest.notNumeric then do CheckNumber. put(Q) end !loop cols end !loop lines ! set up header header.setValue(fld.CleanFileName(fld.FileNameOnly(pFileName,false))) header.replace(' ','') ! remove any spaces: same as out.remove(' ') if header.length() < 18 then header.setLength(18). header.append(' FILE,DRIVER(''BASIC''),NAME(''' & clip(pFileName) & '''),PRE(' & clip(upper(header.slice(1,3))) & ')<13,10>') header.append('record RECORD<13,10>') ! set up trailer trailer.setValue(' END<13,10>' & | ' END<13,10,13,10>' & | '!----------------------------------------------<13,10>') out.setValue('!----------------------------------------------<13,10>') ! output format six times.... out.append('! 1) assumming first row is header with field names capitalized, detect/guess field types<13,10,13,10>') capitalize = true allString = false do OutputAssumingRow1Header out.append('! 2) assumming first row is header with field names underscore separated, detect/guess field types<13,10,13,10>') capitalize = false allString = false do OutputAssumingRow1Header out.append('! 3) assumming first row is header with field names capitalized, all fields string types<13,10,13,10>') capitalize = true allString = true do OutputAssumingRow1Header out.append('! 4) assumming first row is header with field names underscore separated, all fields string types<13,10,13,10>') capitalize = false allString = true do OutputAssumingRow1Header out.append('! 5) assumming no header, detect/guess field types<13,10,13,10>') allString = false do OutputAssumingNoHeaderRow out.append('! 6) assumming no header, all fields string types<13,10,13,10>') allString = true do OutputAssumingNoHeaderRow if errs.length() ! do we have any errors logged? out.append('<13,10>Errors:<13,10,13,10>') out.append(errs) out.append('<13,10>') end fld.setValue(pFilename) fld.setValue(fld.pathOnly() & '\' & fld.fileNameOnly(,false) & | ' layout on ' & clip(left(format(today(),@d12))) & | ' at ' & clip(left(format(clock(),@T05))) & format(clock()%100,@N02) & '.txt') out.saveFile(fld.getvalue()) !stop('saved layout for ' & clip(pFileName) & ' to ' & fld.getValue()) !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= OutputAssumingRow1Header ROUTINE out.append(header) attribsPtr &= Q.rest ! ignore size settings of first line and just use the rest Lne.SetValue(Str.GetLine(1)) ! get first line (assume it is header) Lne.Split(',','"','"',true) cols = lne.records() loop y = 1 to cols fld.setValue(lne.GetLine(y)) if capitalize do CapitalizeFieldName else do UnderscoreSeparateFieldName end get(Q,y) do AddFieldLine end out.append(trailer) !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= OutputAssumingNoHeaderRow ROUTINE attribsPtr &= Q.rest out.append(header) loop y = 1 to cols get(Q,y) if Q.row1.max > Q.rest.max then Q.rest.max = Q.row1.max. if not allString if Q.row1.notNumeric then Q.rest.notNumeric = true. if ~Q.rest.notNumeric if Q.row1.digitsLeft > Q.rest.digitsLeft then Q.rest.digitsLeft = Q.row1.digitsLeft. if Q.row1.digitsRight > Q.rest.digitsRight then Q.rest.digitsRight = Q.row1.digitsRight. if Q.row1.isNegative then Q.rest.isNegative = true. end end fld.setValue('Label' & y) do AddFieldLine end out.append(trailer) !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= AddFieldLine ROUTINE if fld.length() < 22 then fld.setLength(22). if attribsPtr.max = 0 fld.append(' STRING(1) ! note: no data found<13,10>') elsif attribsPtr.notNumeric or allString fld.append(' STRING(' & attribsPtr.max & ')<13,10>') elsif attribsPtr.digitsRight or attribsPtr.DigitsLeft > 9 decLen = attribsPtr.digitsLeft + attribsPtr.DigitsRight if decLen%2 = 0 then decLen += 1. ! move up to odd length for decimals fld.append(' DECIMAL(' & decLen & ',' & attribsPtr.DigitsRight & ')<13,10>') ! elsif Q.DigitsLeft[grpIdx] <= 2 ! Carl notes BASIC import does not do BYTE ! fld.append(' BYTE<13,10>') elsif attribsPtr.DigitsLeft <= 4 ! USHORT 65,535 if attribsPtr.isNegative fld.append(' SHORT<13,10>') else fld.append(' USHORT<13,10>') end else if attribsPtr.isNegative fld.append(' LONG<13,10>') else fld.append(' ULONG<13,10>') end end out.append(fld) !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= ReportError ROUTINE ! note: error is in fld errs.append(fld.getValue() & '<13,10>') fld.trace() stop(fld.getValue() & '<13,10>(Error creating layout from ' & clip(pFileName) & ')') !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= CheckNumber ROUTINE ! note field value is in fld fld.trim() ! remove spaces from both ends tmp.setValue(fld) ! make a temporary copy tmp.trim('-') ! remove '-' from both ends tmp.removeChars(',.') ! 1,234.56 -> 123456 if tmp._DataEnd > 31 or | too long for number tmp.KeepChars('0123456789') or | contains non-numeric chars fld.count('-') > 1 or | fld.count('.') > 1 attribsPtr.notNumeric = true ! invalid numeric so store as string else if fld.containsChar('-') attribsPtr.IsNegative = true fld.trim('-') end fld.replace(',','') !fld.removeChars(',') dot = fld.findChar('.') if dot numLeft = dot - 1 numRight = fld._DataEnd - Dot else numLeft = fld._DataEnd numRight = 0 end if numLeft > attribsPtr.digitsLeft then attribsPtr.digitsLeft = numLeft. if numRight > attribsPtr.digitsRight then attribsPtr.digitsRight = numRight. end !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= CapitalizeFieldName ROUTINE ! note field name is in fld data containsLower Long,auto containsUpper Long,auto i Long,auto code fld.KeepChars('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_: ',' ') fld.Split(' ') loop i = 1 to fld.records() fld.setValue(fld.getLine(i)) if fld.length() < 1 then cycle. containsLower = fld.containsA('abcdefghijklmnopqrstuvwxyz',fld.slice(2)) ! exclude 1st char which will be upper'ed containsUpper = fld.containsA('ABCDEFGHIJKLMNOPQRSTUVWXYZ') if containsUpper if containsLower if ~inrange(val(fld.valuePtr[1]),val('a'),val('z')) ! first char not lower? cycle ! do NOT change this word in field name label - take it as the mixed case given end else fld.lower() end end fld.valuePtr[1] = upper(fld.valuePtr[1]) fld.setLine(i,fld.getValue()) ! update value in queue of words end fld.join('') ! join the words back together to give the field name label if fld._DataEnd < 1 fld.setValue('_') ! blank field name else case val(fld.valuePtr[1]) ! first char should be alpha or underscore of val('A') to val('Z') orof val('_') ! all ok else fld.prepend('_') end end !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= UnderscoreSeparateFieldName ROUTINE ! note field name is in fld fld.KeepChars('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_:','_') fld.trim('_') loop if not fld.replace('__','_') then break. ! easier/faster than recursive replace option end if fld._DataEnd < 1 fld.setValue('_') ! blank field name else case val(fld.valuePtr[1]) ! first char should be alpha or underscore of val('A') to val('Z') orof val('a') to val('z') orof val('_') ! all ok else fld.prepend('_') end end !-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=