All files dataClean.ts

100% Statements 7/7
75% Branches 3/4
100% Functions 1/1
100% Lines 7/7
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63  
 
 
 
 
 
 
1x
135x
135x
 
135x
 
 
 
 
 
135x
 
 
 
 
 
 
 
14x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  import { ParseRuntime } from "./ParseRuntime";
import stripBom from "strip-bom";
/**
 * For each data chunk coming to parser:
 * 1. append the data to the buffer that is left from last chunk
 * 2. check if utf8 chars being split, if does, stripe the bytes and add to left buffer.
 * 3. stripBom 
 */
export function prepareData(chunk: Buffer, runtime: ParseRuntime): string {
  const workChunk = concatLeftChunk(chunk, runtime);
  runtime.csvLineBuffer = undefined;
  const cleanCSVString = cleanUtf8Split(workChunk, runtime).toString("utf8");
  if (runtime.started === false) {
    return stripBom(cleanCSVString);
  } else {
    return cleanCSVString;
  }
}I
/**
 *  append data to buffer that is left form last chunk
 */
function concatLeftChunk(chunk: Buffer, runtime: ParseRuntime): Buffer {
  if (runtime.csvLineBuffer && runtime.csvLineBuffer.length > 0) {
    return Buffer.concat([runtime.csvLineBuffer, chunk]);
  } else {
    return chunk;
  }
}
/**
 * check if utf8 chars being split, if does, stripe the bytes and add to left buffer.
 */
function cleanUtf8Split(chunk: Buffer, runtime: ParseRuntime): Buffer {
  let idx = chunk.length - 1;
  /**
   * From Keyang:
   * The code below is to check if a single utf8 char (which could be multiple bytes) being split.
   * If the char being split, the buffer from two chunk needs to be concat
   * check how utf8 being encoded to understand the code below. 
   * If anyone has any better way to do this, please let me know.
   */
  if ((chunk[idx] & 1 << 7) != 0) {
    while ((chunk[idx] & 3 << 6) === 128) {
      idx--;
    }
    idx--;
  }
  if (idx != chunk.length - 1) {
    runtime.csvLineBuffer = chunk.slice(idx + 1);
    return chunk.slice(0, idx + 1)
    // var _cb=cb;
    // var self=this;
    // cb=function(){
    //   if (self._csvLineBuffer){
    //     self._csvLineBuffer=Buffer.concat([bufFromString(self._csvLineBuffer,"utf8"),left]);
    //   }else{
    //     self._csvLineBuffer=left;
    //   }
    //   _cb();
    // }
  } else {
    return chunk;
  }
}