1 : /*
2 : * Parser for debian database files
3 : *
4 : * Copyright (C) 2003--2007 Enrico Zini <enrico@debian.org>
5 : *
6 : * This library is free software; you can redistribute it and/or
7 : * modify it under the terms of the GNU Lesser General Public
8 : * License as published by the Free Software Foundation; either
9 : * version 2.1 of the License, or (at your option) any later version.
10 : *
11 : * This library is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with this library; if not, write to the Free Software
18 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 : */
20 :
21 : #include <ept/debtags/maint/debdbparser.h>
22 :
23 : #include <tagcoll/input/base.h>
24 :
25 : #include <map>
26 : #include <ctype.h>
27 :
28 : // using namespace std;
29 : using namespace tagcoll;
30 :
31 : namespace ept {
32 : namespace debtags {
33 :
34 : // Eat spaces and empty lines
35 : // Returns the number of '\n' encountered
36 12089 : int DebDBParser::eatSpacesAndEmptyLines()
37 : {
38 12089 : int res = 0;
39 : int c;
40 31183 : while ((c = in.nextChar()) != input::Input::Eof && (isblank(c) || c == '\n'))
41 7005 : if (c == '\n')
42 : {
43 3314 : isBOL = true;
44 : //line++;
45 3314 : res++;
46 : } else
47 3691 : isBOL = false;
48 :
49 12089 : if (c == input::Input::Eof)
50 1308 : isEOF = true;
51 : else
52 10781 : in.pushChar(c);
53 :
54 12089 : return res;
55 : }
56 :
57 : // Get the ^([A-Za-z0-9]+) field name
58 7094 : std::string DebDBParser::getFieldName()
59 : {
60 7094 : if (! isBOL)
61 0 : throw exception::Parser(in, "field must start at the beginning of the line");
62 :
63 7094 : std::string res;
64 :
65 : int c;
66 63954 : while ((c = in.nextChar()) != input::Input::Eof && (isalnum(c) || c == '-'))
67 49766 : res += c;
68 :
69 7094 : if (c == input::Input::Eof)
70 : {
71 0 : isEOF = true;
72 0 : if (!res.empty())
73 0 : throw exception::Parser(in, "field is truncated at end of file. Last line begins with: \"" + res + "\n");
74 : } else
75 7094 : in.pushChar(c);
76 :
77 0 : return res;
78 : }
79 :
80 : // Eat the \s*: characters that divide the field name and the field
81 : // data
82 7094 : void DebDBParser::eatFieldSep()
83 : {
84 : int c;
85 :
86 7094 : while ((c = in.nextChar()) != input::Input::Eof && isblank(c))
87 : ;
88 :
89 7094 : if (c != ':')
90 : {
91 0 : if (c == input::Input::Eof)
92 : {
93 0 : isEOF = true;
94 0 : throw exception::Parser(in, "field is truncated at end of file");
95 : } else {
96 0 : throw exception::Parser(in, std::string("invalid character `") + (char)c + "' expecting `:'");
97 : }
98 : }
99 7094 : }
100 :
101 : // Get the \s*(.+?)\s*\n of a body line
102 17875 : void DebDBParser::appendFieldBody(std::string& body)
103 : {
104 : int c;
105 :
106 : // Skip leading spaces
107 17875 : while ((c = in.nextChar()) != input::Input::Eof && isblank(c))
108 : ;
109 :
110 : // Get the body part
111 280757 : for ( ; c != input::Input::Eof && c != '\n'; c = in.nextChar())
112 269976 : body += c;
113 :
114 : // Delete trailing spaces
115 10781 : size_t end = body.find_last_not_of(" \t");
116 10781 : if (end != std::string::npos)
117 10781 : body.resize(end + 1);
118 :
119 10781 : if (c == input::Input::Eof)
120 0 : isEOF = true;
121 : else
122 : {
123 : //line++;
124 10781 : isBOL = true;
125 : }
126 10781 : }
127 :
128 :
129 1308 : DebDBParser::DebDBParser(input::Input& input) :
130 1308 : in(input), isBOL(true), isEOF(false)
131 : {
132 : // Go at the start of the next record
133 1308 : eatSpacesAndEmptyLines();
134 1308 : }
135 :
136 :
137 : // Read a record and positions itself at the start of the next one
138 : // Returns false when there are no more records available
139 3261 : bool DebDBParser::nextRecord(Record& rec)
140 : {
141 3261 : if (isEOF)
142 7 : return false;
143 :
144 3254 : rec.clear();
145 :
146 : int n;
147 12880 : do {
148 : // Read the field name
149 7094 : std::string field = getFieldName();
150 7094 : std::string body;
151 :
152 : //fprintf(stderr, "Got field: %.*s\n", field.size(), field.data());
153 :
154 : // Read the colon
155 7094 : eatFieldSep();
156 :
157 : // Read the first line of the field body
158 7094 : appendFieldBody(body);
159 : //fprintf(stderr, "Got body: %.*s\n", body.size(), body.data());
160 :
161 : // Read the continuation lines of field body
162 17875 : while ((n = eatSpacesAndEmptyLines()) == 0 && ! isBOL)
163 : {
164 3687 : body += '\n';
165 :
166 3687 : size_t start_size = body.size();
167 :
168 3687 : appendFieldBody(body);
169 :
170 : // Check for dot-only lines to be changed to empty lines
171 3687 : if (body.size() - start_size == 1 && body[body.size() - 1] == '.')
172 601 : body.resize(body.size() - 1);
173 :
174 : //fprintf(stderr, "Appended body: %.*s\n", body.size(), body.data());
175 : }
176 : //fprintf(stderr, "Trailing newlines: %d\n", n);
177 :
178 :
179 7094 : rec.insert(std::pair<std::string,std::string>(field, body));
180 : } while (!isEOF && !n);
181 :
182 3254 : return true;
183 : }
184 :
185 : }
186 6 : }
187 :
188 : // vim:set ts=4 sw=4:
|