1use std::fs::read;
2use std::iter::Peekable;
3use std::path::Path;
4use std::str::Chars;
5
6use anyhow::{Result, bail};
7use encoding_rs::WINDOWS_1252;
8
9use crate::fileset::FileEntry;
10use crate::report::ErrorLoc;
11use crate::token::{Loc, Token};
12
13#[derive(Clone, Debug)]
14struct CsvParser<'a> {
15 loc: Loc,
16 offset: usize,
17 content: &'a str,
18 header_lines: usize,
19 chars: Peekable<Chars<'a>>,
20}
21
22impl<'a> CsvParser<'a> {
23 fn new(mut loc: Loc, header_lines: usize, content: &'a str) -> Self {
24 loc.line = 1;
25 loc.column = 1;
26 let chars = content.chars().peekable();
27 Self { loc, offset: 0, content, header_lines, chars }
28 }
29
30 fn next_char(&mut self) {
31 if let Some(c) = self.chars.next() {
33 self.offset += c.len_utf8();
34 if c == '\n' {
35 self.loc.line += 1;
36 self.loc.column = 1;
37 } else {
38 self.loc.column += 1;
39 }
40 }
41 }
42
43 fn skip_whitespace(&mut self) {
44 while let Some(c) = self.chars.peek() {
45 if c.is_ascii_whitespace() {
46 self.next_char();
47 } else {
48 break;
49 }
50 }
51 }
52
53 fn skip_line(&mut self) {
54 while let Some(&c) = self.chars.peek() {
55 if c == '\n' {
56 break;
57 }
58 self.next_char();
59 }
60 self.next_char(); }
62
63 fn parse_csv(&mut self) -> Option<Vec<Token>> {
65 loop {
67 self.skip_whitespace();
68 if self.chars.peek() == Some(&'#') {
69 self.skip_line();
70 } else if self.header_lines > 0 {
71 self.skip_line();
72 self.header_lines -= 1;
73 } else {
74 break;
75 }
76 }
77 self.chars.peek()?;
78
79 let mut vec = Vec::new();
80 let mut loc = self.loc;
81 let mut start_offset = self.offset;
82
83 while let Some(c) = self.chars.peek() {
84 match c {
85 '#' | '\r' | '\n' | ';' => {
86 let s = &self.content[start_offset..self.offset];
87 vec.push(Token::new(s, loc));
88 if c == &';' {
89 self.next_char();
90 loc = self.loc;
91 start_offset = self.offset;
92 } else {
93 break;
94 }
95 }
96 _ => self.next_char(),
97 }
98 }
99
100 self.skip_line();
101 Some(vec)
102 }
103}
104
105pub struct CsvReader<'a> {
106 parser: CsvParser<'a>,
107}
108
109impl Iterator for CsvReader<'_> {
110 type Item = Vec<Token>;
111
112 fn next(&mut self) -> Option<Self::Item> {
113 self.parser.parse_csv()
114 }
115}
116
117pub fn read_csv(fullpath: &Path) -> Result<String> {
118 let bytes = read(fullpath)?;
119 let (content, errors) = WINDOWS_1252.decode_without_bom_handling(&bytes);
120 if errors {
121 bail!("invalid characters");
122 }
123 Ok(content.into_owned())
124}
125
126#[allow(clippy::module_name_repetitions)]
127pub fn parse_csv<'a>(entry: &FileEntry, header_lines: usize, content: &'a str) -> CsvReader<'a> {
128 let parser = CsvParser::new(entry.into_loc(), header_lines, content);
129 CsvReader { parser }
130}