WME/aufgabe5/node_modules/csvtojson/src/rowSplit.ts

229 lines
6.8 KiB
TypeScript

import { CSVParseParam } from "./Parameters";
import { Converter } from "./Converter";
import { Fileline } from "./fileline";
import getEol from "./getEol";
import { filterArray, trimLeft, trimRight } from "./util";
const defaulDelimiters = [",", "|", "\t", ";", ":"];
export class RowSplit {
private quote: string;
private trim: boolean;
private escape: string;
private cachedRegExp: { [key: string]: RegExp } = {};
private delimiterEmitted = false;
private _needEmitDelimiter?: boolean = undefined;
private get needEmitDelimiter() {
if (this._needEmitDelimiter === undefined) {
this._needEmitDelimiter = this.conv.listeners("delimiter").length > 0;
}
return this._needEmitDelimiter;
}
constructor(private conv: Converter) {
this.quote = conv.parseParam.quote;
this.trim = conv.parseParam.trim;
this.escape = conv.parseParam.escape;
}
parse(fileline: Fileline): RowSplitResult {
if (fileline.length === 0 || (this.conv.parseParam.ignoreEmpty && fileline.trim().length === 0)) {
return { cells: [], closed: true };
}
const quote = this.quote;
const trim = this.trim;
const escape = this.escape;
if (this.conv.parseRuntime.delimiter instanceof Array || this.conv.parseRuntime.delimiter.toLowerCase() === "auto") {
this.conv.parseRuntime.delimiter = this.getDelimiter(fileline);
}
if (this.needEmitDelimiter && !this.delimiterEmitted) {
this.conv.emit("delimiter", this.conv.parseRuntime.delimiter);
this.delimiterEmitted = true;
}
const delimiter = this.conv.parseRuntime.delimiter;
const rowArr = fileline.split(delimiter);
if (quote === "off") {
if (trim) {
for (let i = 0; i < rowArr.length; i++) {
rowArr[i] = rowArr[i].trim();
}
}
return { cells: rowArr, closed: true };
} else {
return this.toCSVRow(rowArr, trim, quote, delimiter);
}
}
private toCSVRow(rowArr: string[], trim: boolean, quote: string, delimiter: string): RowSplitResult {
const row: string[] = [];
let inquote = false;
let quoteBuff = '';
for (let i = 0, rowLen = rowArr.length; i < rowLen; i++) {
let e = rowArr[i];
if (!inquote && trim) {
e = trimLeft(e);
}
const len = e.length;
if (!inquote) {
if (len === 2 && e === this.quote + this.quote) {
row.push("");
continue;
} else if (this.isQuoteOpen(e)) { //quote open
e = e.substr(1);
if (this.isQuoteClose(e)) { //quote close
e = e.substring(0, e.lastIndexOf(quote));
e = this.escapeQuote(e);
row.push(e);
continue;
} else if (e.indexOf(quote) !== -1) {
let count = 0;
for (const c of e) {
if (c === quote) {
count++;
}
}
if (count % 2 === 1) {
if (trim) {
e = trimRight(e);
}
row.push(quote + e);
continue;
}else{
inquote = true;
quoteBuff += e;
continue;
}
}
else {
inquote = true;
quoteBuff += e;
continue;
}
} else {
if (trim) {
e = trimRight(e);
}
row.push(e);
continue;
}
} else { //previous quote not closed
if (this.isQuoteClose(e)) { //close double quote
inquote = false;
e = e.substr(0, len - 1);
quoteBuff += delimiter + e;
quoteBuff = this.escapeQuote(quoteBuff);
if (trim) {
quoteBuff = trimRight(quoteBuff);
}
row.push(quoteBuff);
quoteBuff = "";
} else {
quoteBuff += delimiter + e;
}
}
}
// if (!inquote && param._needFilterRow) {
// row = filterRow(row, param);
// }
return { cells: row, closed: !inquote };
}
private getDelimiter(fileline: Fileline): string {
let checker;
if (this.conv.parseParam.delimiter === "auto") {
checker = defaulDelimiters;
} else if (this.conv.parseParam.delimiter instanceof Array) {
checker = this.conv.parseParam.delimiter;
} else {
return this.conv.parseParam.delimiter;
}
let count = 0;
let rtn = ",";
checker.forEach(function (delim) {
const delimCount = fileline.split(delim).length;
if (delimCount > count) {
rtn = delim;
count = delimCount;
}
});
return rtn;
}
private isQuoteOpen(str: string): boolean {
const quote = this.quote;
const escape = this.escape;
return str[0] === quote && (
str[1] !== quote ||
str[1] === escape && (str[2] === quote || str.length === 2));
}
private isQuoteClose(str: string): boolean {
const quote = this.quote;
const escape = this.escape;
if (this.conv.parseParam.trim) {
str = trimRight(str);
}
let count = 0;
let idx = str.length - 1;
while (str[idx] === quote || str[idx] === escape) {
idx--;
count++;
}
return count % 2 !== 0;
}
// private twoDoubleQuote(str: string): string {
// var twoQuote = this.quote + this.quote;
// var curIndex = -1;
// while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) {
// str = str.substring(0, curIndex) + str.substring(++curIndex);
// }
// return str;
// }
private escapeQuote(segment: string): string {
const key = "es|" + this.quote + "|" + this.escape;
if (this.cachedRegExp[key] === undefined) {
this.cachedRegExp[key] = new RegExp('\\' + this.escape + '\\' + this.quote, 'g');
}
const regExp = this.cachedRegExp[key];
// console.log(regExp,segment);
return segment.replace(regExp, this.quote);
}
parseMultiLines(lines: Fileline[]): MultipleRowResult {
const csvLines: string[][] = [];
let left = "";
while (lines.length) {
const line = left + lines.shift();
const row = this.parse(line);
if (row.cells.length === 0 && this.conv.parseParam.ignoreEmpty) {
continue;
}
if (row.closed || this.conv.parseParam.alwaysSplitAtEOL) {
if (this.conv.parseRuntime.selectedColumns) {
csvLines.push(filterArray(row.cells, this.conv.parseRuntime.selectedColumns));
} else {
csvLines.push(row.cells);
}
left = "";
} else {
left = line + (getEol(line, this.conv.parseRuntime) || "\n");
}
}
return { rowsCells: csvLines, partial: left };
}
}
export interface MultipleRowResult {
rowsCells: string[][];
partial: string;
}
export interface RowSplitResult {
/**
* csv row array. ["a","b","c"]
*/
cells: string[],
/**
* if the passed fileline is a complete row
*/
closed: boolean
}