All files / csv2json/src rowSplit.ts

95.08% Statements 116/122
94.55% Branches 52/55
91.67% Functions 11/12
95.04% Lines 115/121

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200      1x 1x   1x 1x       77x 77x 77x 1x 32192x 70x   32192x   77x 77x 77x 77x   1x 32200x 8x   32192x 32192x 32192x 32192x 4x     32192x 2x 2x   32192x 32192x 32192x     32192x       1x 32192x 32192x 32192x 32192x 66757x 66757x 66625x   66757x 66757x 66643x 158x 158x 107x 107x 107x 107x   51x 51x 51x     66485x 66473x   66485x 66485x     114x 26x 26x 26x 26x 26x 26x   26x 26x   88x                 32192x   1x   8x 6x 2x 1x   1x   7x 7x 7x 33x 33x 13x 13x     7x   1x 66643x 66643x 66643x       1x 272x 272x 272x 266x   272x 272x 272x 191x 191x   272x     1x                   1x 133x 133x 25x   133x   133x   1x 107x 107x 107x 32137x 32137x 32137x 32116x 18x   32098x     32116x   21x     107x   1x                                
import { CSVParseParam } from "./Parameters";
import { Converter } from "./Converter";
import { Fileline } from "./fileline";
import getEol from "./getEol";
import { filterArray } from "./util";
 
const defaulDelimiters = [",", "|", "\t", ";", ":"];
export class RowSplit {
  private quote: string;
  private trim: boolean;
  private escape: string;
  private cachedRegExp: { [key: string]: RegExp } = {};
  private delimiterEmitted = false;
  private _needEmitDelimiter?: boolean = undefined;
  private get needEmitDelimiter() {
    if (this._needEmitDelimiter === undefined) {
      this._needEmitDelimiter = this.conv.listeners("delimiter").length > 0;
    }
    return this._needEmitDelimiter;
  }
  constructor(private conv: Converter) {
    this.quote = conv.parseParam.quote;
    this.trim = conv.parseParam.trim;
    this.escape = conv.parseParam.escape;
  }
  parse(fileline: Fileline): RowSplitResult {
    if (fileline === "") {
      return { cells: [], closed: true };
    }
    const quote = this.quote;
    const trim = this.trim;
    const escape = this.escape;
    if (this.conv.parseRuntime.delimiter instanceof Array || this.conv.parseRuntime.delimiter.toLowerCase() === "auto") {
      this.conv.parseRuntime.delimiter = this.getDelimiter(fileline);
 
    }
    if (this.needEmitDelimiter && !this.delimiterEmitted) {
      this.conv.emit("delimiter", this.conv.parseRuntime.delimiter);
      this.delimiterEmitted = true;
    }
    const delimiter = this.conv.parseRuntime.delimiter;
    const rowArr = fileline.split(delimiter);
    Iif (quote === "off") {
      return { cells: rowArr, closed: true };
    } else {
      return this.toCSVRow(rowArr, trim, quote, delimiter);
    }
 
  }
  private toCSVRow(rowArr: string[], trim: boolean, quote: string, delimiter: string): RowSplitResult {
    const row: string[] = [];
    let inquote = false;
    let quoteBuff = '';
    for (let i = 0, rowLen = rowArr.length; i < rowLen; i++) {
      let e = rowArr[i];
      if (!inquote && trim) {
        e = e.trimLeft();
      }
      const len = e.length;
      if (!inquote) {
        if (this.isQuoteOpen(e)) { //quote open
          e = e.substr(1);
          if (this.isQuoteClose(e)) { //quote close
            e = e.substring(0, e.lastIndexOf(quote));
            e = this.escapeQuote(e);
            row.push(e);
            continue;
          } else {
            inquote = true;
            quoteBuff += e;
            continue;
          }
        } else {
          if (trim) {
            e = e.trimRight();
          }
          row.push(e);
          continue;
        }
      } else { //previous quote not closed
        if (this.isQuoteClose(e)) { //close double quote
          inquote = false;
          e = e.substr(0, len - 1);
          quoteBuff += delimiter + e;
          quoteBuff = this.escapeQuote(quoteBuff);
          Eif (trim) {
            quoteBuff = quoteBuff.trimRight();
          }
          row.push(quoteBuff);
          quoteBuff = "";
        } else {
          quoteBuff += delimiter + e;
        }
      }
    }
 
    // if (!inquote && param._needFilterRow) {
    //   row = filterRow(row, param);
    // }
 
    return { cells: row, closed: !inquote };
  }
  private getDelimiter(fileline: Fileline): string {
    let checker;
    if (this.conv.parseParam.delimiter === "auto") {
      checker = defaulDelimiters;
    } else if (this.conv.parseParam.delimiter instanceof Array) {
      checker = this.conv.parseParam.delimiter;
    } else {
      return this.conv.parseParam.delimiter;
    }
    let count = 0;
    let rtn = ",";
    checker.forEach(function (delim) {
      const delimCount = fileline.split(delim).length;
      if (delimCount > count) {
        rtn = delim;
        count = delimCount;
      }
    });
    return rtn;
  }
  private isQuoteOpen(str: string): boolean {
    const quote = this.quote;
    const escape = this.escape;
    return str[0] === quote && (
      str[1] !== quote ||
      str[1] === escape && (str[2] === quote || str.length === 2));
  }
  private isQuoteClose(str: string): boolean {
    const quote = this.quote;
    const escape = this.escape;
    if (this.conv.parseParam.trim) {
      str = str.trimRight();
    }
    let count = 0;
    let idx = str.length - 1;
    while (str[idx] === quote || str[idx] === escape) {
      idx--;
      count++;
    }
    return count % 2 !== 0;
  }
 
  private twoDoubleQuote(str: string): string {
    var twoQuote = this.quote + this.quote;
    var curIndex = -1;
    while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) {
      str = str.substring(0, curIndex) + str.substring(++curIndex);
    }
    return str;
  }
 
 
  private escapeQuote(segment: string): string {
    const key = "es|" + this.quote + "|" + this.escape;
    if (this.cachedRegExp[key] === undefined) {
      this.cachedRegExp[key] = new RegExp('\\' + this.escape + '\\' + this.quote, 'g');
    }
    const regExp = this.cachedRegExp[key];
    // console.log(regExp,segment);
    return segment.replace(regExp, this.quote);
  }
  parseMultiLines(lines: Fileline[]): MultipleRowResult {
    const csvLines: string[][] = [];
    let left = "";
    while (lines.length) {
      const line = left + lines.shift();
      const row = this.parse(line);
      if (row.closed || this.conv.parseParam.alwaysSplitAtEOL) {
        if (this.conv.parseRuntime.selectedColumns) {
          csvLines.push(filterArray(row.cells, this.conv.parseRuntime.selectedColumns));
        } else {
          csvLines.push(row.cells);
        }
 
        left = "";
      } else {
        left = line + (getEol(line, this.conv.parseRuntime) || "\n");
      }
    }
    return { rowsCells: csvLines, partial: left };
  }
}
export interface MultipleRowResult {
  rowsCells: string[][];
  partial: string;
}
export interface RowSplitResult {
  /**
   * csv row array. ["a","b","c"]
   */
  cells: string[],
  /**
   * if the passed fileline is a complete row
   */
  closed: boolean
}