1: # 357 "./lpsrc/flx_tokeniser.ipk"
2: open Flx_ast
3: open Flx_exceptions
4: open List
5: open Flx_srcref
6: open Flx_parse
7:
8: let print_pre_token t =
9: let emit t = print_string (Flx_prelex.string_of_token t) in
10: begin match t with
11: | COMMENT_NEWLINE s ->
12: print_endline ("//" ^ s);
13:
14: | NEWLINE ->
15: print_endline ""
16:
17: | ENDMARKER -> print_endline "<<EOF>>"
18: | _ -> emit t
19: end;
20: flush stdout
21:
22: let print_pre_tokens ts =
23: if (length ts) = 0
24: then print_string "<Empty pretoken list>";
25: print_string " 1: ";
26: iter print_pre_token ts
27:
28: let print_tokens ts =
29: let lineno = ref 0 in
30: let indent = ref 0 in
31: let emit t =
32: print_string ((Flx_prelex.string_of_token t) ^ " ")
33: and emit_eol t =
34: print_endline t;
35: let s' = " " ^ (string_of_int !lineno) in
36: let n = String.length s' in
37: print_string ((String.sub s' (n-4) 4) ^ ": ");
38: for i=0 to !indent -1 do print_string " " done
39: in
40: let print_token t =
41: begin match t with
42: | NEWLINE ->
43: emit_eol ("//")
44: | LBRACE _ ->
45: incr indent;
46: emit_eol " {"
47: | RBRACE _ ->
48: decr indent;
49: emit_eol "}"
50: | ENDMARKER -> emit_eol "#<<EOF>>"
51: | _ -> emit t
52: end;
53: flush stdout
54: in
55: iter print_token ts
56: ;;
57:
58: class tokeniser t =
59: object(self)
60: val mutable tokens = []
61: val mutable tokens_copy = []
62: val mutable current_token_index = 0
63: initializer tokens <- t; tokens_copy <- t
64:
65: method token_peek (dummy :Lexing.lexbuf) =
66: hd tokens
67:
68: method token_src (dummy :Lexing.lexbuf) =
69: let tmp = hd tokens in
70: tokens <- tl tokens;
71: current_token_index <- current_token_index + 1;
72: match tmp with
73: | USER_STATEMENT_KEYWORD (sr,s,tkss,nonterminals) ->
74: (*
75: print_endline ("TRANSLATING USER STATEMENT KEYWORD " ^ s);
76: *)
77: let f = fun () -> self#parse_user_statement s (slift sr) tkss nonterminals in
78: USER_STATEMENT_DRIVER (sr,s,f)
79: | _ -> tmp
80:
81: method put_back (x:token) =
82: tokens <- x :: tokens;
83: current_token_index <- current_token_index - 1
84:
85: method get_loc =
86: let token = nth tokens_copy current_token_index in
87: slift (Flx_prelex.src_of_token token)
88:
89: method report_syntax_error =
90: print_endline "";
91: print_endline "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
92: let n = length tokens_copy in
93: let first = max 0 (current_token_index - 20)
94: and last = min (n-1) (current_token_index + 20)
95: and slist = ref [] in
96: for i = first to current_token_index-1 do
97: slist := concat [!slist; [nth tokens_copy i]]
98: done;
99: print_tokens !slist;
100: print_endline "";
101:
102: let j =
103: begin
104: if length tokens_copy = current_token_index
105: then begin
106: print_string "Unexpected End Of File";
107: current_token_index - 1
108: end else begin
109: print_string "Syntax Error before token ";
110: print_string (string_of_int current_token_index);
111: current_token_index
112: end
113: end
114: in
115: let token = nth tokens_copy j in
116: let sr = ref (Flx_prelex.src_of_token token) in
117: let file,line,scol,ecol = !sr in
118: if line <> 0 or j = 0 then
119: print_endline
120: (
121: " in " ^ file ^
122: ", line " ^ string_of_int line ^
123: " col " ^ string_of_int scol
124: )
125: else begin
126: let token = nth tokens_copy (j-1) in
127: sr := Flx_prelex.src_of_token token;
128: let file,line,scol,ecol = !sr in
129: print_endline
130: (
131: " in " ^ file ^
132: ", after line " ^ string_of_int line ^
133: " col " ^ string_of_int scol
134: )
135: end
136: ;
137:
138: slist := [];
139: for i = current_token_index to last do
140: slist := concat [!slist; [nth tokens_copy i]]
141: done;
142: print_tokens !slist;
143: print_endline "";
144: print_endline "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
145: flush stdout;
146: (*
147: clierr (slift (!sr)) "Syntax Error";
148: ()
149: *)
150:
151: method parse_user_statement
152: (name:string)
153: (sr:range_srcref)
154: (tokss: (token list * ast_term_t) list)
155: (nonterminals: (string,(token list * ast_term_t) list) Hashtbl.t)
156: : statement_t =
157: let term = self#parse_alternatives name sr tokss nonterminals in
158: `AST_user_statement (sr,name,term)
159:
160: method private parse_alternatives
161: (name:string)
162: (sr:range_srcref)
163: (tokss:(token list * ast_term_t) list)
164: (nonterminals: (string,(token list * ast_term_t) list) Hashtbl.t)
165: : ast_term_t =
166:
167: (* save state for backtracking *)
168: let saved_tokens = tokens in
169: let saved_token_index = current_token_index in
170: let rec aux tokss = match tokss with
171: | (toks,term) :: tail ->
172: begin
173: try
174: `Apply_term (term,self#parse_production name sr toks nonterminals)
175: with RDP_match_fail _ ->
176: (* backtrack for next try *)
177: (*
178: print_endline "DEBUG: match fail caught (backtrack exception)";
179: *)
180: tokens <- saved_tokens;
181: current_token_index <- saved_token_index;
182: aux tail
183: | x ->
184: print_endline ("RDP: unexpected error: " ^ Printexc.to_string x);
185: failwith "RDP: unexpected error!"
186: end
187: | [] ->
188: (*
189: print_endline "DEBUG: raise alternatives exhausted";
190: *)
191: rdp_alternatives_exhausted sr
192: ("Syntax error matching user statement " ^ name)
193:
194: in
195: match tokss with
196: | [] -> clierr sr "Woops, no alternatives??"
197: | [toks,term] ->
198: begin try
199: `Apply_term (term,self#parse_production name sr toks nonterminals)
200: with RDP_match_fail (sr1,sr2,s) ->
201: (*
202: print_endline "RD match failure on sole alternative";
203: *)
204: rdp_match_fail sr1 sr2 s
205: end
206: | xs -> aux xs
207:
208: method private parse_production
209: (name:string)
210: (sr:range_srcref)
211: (toks:token list)
212: (nonterminals: (string,(token list * ast_term_t) list) Hashtbl.t)
213: : ast_term_t list =
214:
215: let dummy_lexbuf = Lexing.from_string "blah" in
216: let rec aux toks res = match toks with
217: | h :: t ->
218: begin match h with
219: | EXPRESSION _ ->
220: (*
221: print_endline "Matching expression ..";
222: *)
223: let e,tk =
224: try exprx self#token_src dummy_lexbuf
225: with ParseError s -> rdp_match_fail sr self#get_loc "parsing exprx"
226: in
227: (*
228: print_endline (
229: "Expression matched, stopped by " ^
230: Flx_prelex.string_of_token tk
231: );
232: *)
233: self#put_back tk;
234: aux t (`Expression_term e :: res)
235:
236: | STATEMENT _ ->
237: (*
238: print_endline "Matching statement ..";
239: *)
240: let s =
241: try statement self#token_src dummy_lexbuf
242: with ParseError s -> rdp_match_fail sr self#get_loc "parsing statement"
243: in
244: aux t (`Statement_term s :: res)
245:
246: | STATEMENTS _ ->
247: (*
248: print_endline "Matching statements ..";
249: *)
250: let s,tk =
251: try statementsx self#token_src dummy_lexbuf
252: with ParseError s -> rdp_match_fail sr self#get_loc "parsing statementsx"
253: in
254: self#put_back tk;
255: aux t (`Statements_term s :: res)
256:
257: | IDENT _ ->
258: (*
259: print_endline "Matching ident ..";
260: *)
261: let tok' = self#token_src dummy_lexbuf in
262: begin match tok' with
263: | NAME (sr,s) ->
264: aux t (`Identifier_term s :: res)
265: | _ ->
266: rdp_match_fail sr self#get_loc
267: (
268: "User statement: identifier requires, got " ^
269: Flx_prelex.string_of_token tok'
270: )
271: end
272:
273:
274: | INTEGER_LITERAL _ ->
275: let tok' = self#token_src dummy_lexbuf in
276: begin match tok' with
277: | INTEGER (sr,kind,vl) ->
278: let j = `AST_literal (slift sr,`AST_int (kind,vl)) in
279: aux t (`Expression_term j :: res)
280: | _ ->
281: rdp_match_fail sr self#get_loc
282: (
283: "User statement: integer required, got " ^
284: Flx_prelex.string_of_token tok'
285: )
286: end
287:
288: | FLOAT_LITERAL _ ->
289: let tok' = self#token_src dummy_lexbuf in
290: begin match tok' with
291: | FLOAT (sr,kind,vl) ->
292: let j = `AST_literal (slift sr,`AST_float (kind,vl)) in
293: aux t (`Expression_term j :: res)
294: | _ ->
295: rdp_match_fail sr self#get_loc
296: (
297: "User statement: integer required, got " ^
298: Flx_prelex.string_of_token tok'
299: )
300: end
301:
302: | STRING_LITERAL _ ->
303: let tok' = self#token_src dummy_lexbuf in
304: begin match tok' with
305: | STRING (sr,s) ->
306: let j = `AST_literal (slift sr,`AST_string s) in
307: aux t (`Expression_term j :: res)
308: | CSTRING (sr,s) ->
309: let j = `AST_literal (slift sr,`AST_cstring s) in
310: aux t (`Expression_term j :: res)
311: | WSTRING (sr,s) ->
312: let j = `AST_literal (slift sr,`AST_wstring s) in
313: aux t (`Expression_term j :: res)
314: | USTRING (sr,s) ->
315: let j = `AST_literal (slift sr,`AST_ustring s) in
316: aux t (`Expression_term j :: res)
317: | _ ->
318: rdp_match_fail sr self#get_loc
319: (
320: "User statement: integer required, got " ^
321: Flx_prelex.string_of_token tok'
322: )
323: end
324:
325: | tok ->
326: let s = Flx_prelex.string_of_token tok in
327: (*
328: print_endline ("Checking if " ^ s ^ " is a nonterminal");
329: *)
330: let alts =
331: try Some (Hashtbl.find nonterminals s)
332: with Not_found -> None
333: in
334: begin match alts with
335: | Some productions ->
336: (*
337: print_endline ("FOUND NONTERMINAL " ^ s);
338: *)
339: let result =
340: try self#parse_alternatives s sr productions nonterminals
341: with RDP_alternatives_exhausted (sr2,s) ->
342: rdp_match_fail sr sr2 s
343: in
344: aux t (result :: res)
345:
346: | None ->
347: (*
348: print_endline "Nope, not a non-terminal";
349: *)
350: let tok' = self#token_src dummy_lexbuf in
351: let s' = Flx_prelex.string_of_token tok' in
352: (*
353: print_endline ("Matching other token " ^ s ^ " with " ^ s');
354: *)
355: if s = s' then
356: aux t (`Keyword_term s :: res)
357: else rdp_match_fail sr self#get_loc
358: (
359: "Syntax Error in user statement: " ^
360: "Failed to match keyword or symbol " ^
361: s ^ ", got " ^ s' ^ " instead"
362: )
363: end
364: end
365:
366: | [] -> rev res
367: in aux toks []
368:
369: end
370: ;;
371:
372:
373: type 'a parser_t =
374: (Lexing.lexbuf -> Flx_parse.token) ->
375: Lexing.lexbuf ->
376: 'a
377:
378: let parse_tokens (parser:'a parser_t) (tokens: Flx_parse.token list) =
379: let toker = (new tokeniser tokens) in
380: try
381: parser (toker#token_src) (Lexing.from_string "dummy" )
382: with
383: | Flx_exceptions.ClientError _
384: | Flx_exceptions.ClientError2 _
385: | Flx_exceptions.ClientErrorn _ as x ->
386: (*
387: print_endline ("got client error from parse..");
388: *)
389: toker#report_syntax_error;
390: raise x
391:
392: | Flx_exceptions.ParseError _ as x ->
393: (*
394: print_endline ("got ParseError from parse..");
395: *)
396: toker#report_syntax_error;
397: raise x
398:
399: | Flx_exceptions.RDP_match_fail _ as x ->
400: (*
401: print_endline ("got RDP_match_fail from parse..");
402: *)
403: toker#report_syntax_error;
404: raise x
405:
406: | _ ->
407: (*
408: print_endline "Got unknown error from parse..";
409: *)
410: toker#report_syntax_error;
411: raise (Flx_exceptions.ParseError "Parsing Tokens")
412:
413: