5.53. Inlining

To make this work, we need a theorem. First, the call rule is:
A procedure may only call a child of an ancestor.
Note an ancestor is itself or a parent of any ancestor: that is, a procedure is an ancestor of itself. A parentless toplevel procedure is considered a child of a dummy root to make this simple formulation work.

It is clear we can inline any sibling by copying its body, and duplicating any children -- variables and nested procedures included. This is because any references to its parent will go through from the caller, since they have the same parent.

Clearly this result extends to any child of any parent.

Start ocaml section to src/flx_inline.mli[1 /1 ]
     1: # 27 "./lpsrc/flx_inline.ipk"
     2: open Flx_ast
     3: open Flx_types
     4: open Flx_mtypes1
     5: open Flx_mtypes2
     6: open Flx_call
     7: 
     8: val heavy_inlining:
     9:   sym_state_t ->
    10:   (bid_t, bid_t list) Hashtbl.t *
    11:   fully_bound_symbol_table_t ->
    12:   unit
    13: 
End ocaml section to src/flx_inline.mli[1]
Start ocaml section to src/flx_inline.ml[1 /1 ]
     1: # 41 "./lpsrc/flx_inline.ipk"
     2: open Flx_util
     3: open Flx_ast
     4: open Flx_types
     5: open Flx_print
     6: open Flx_mtypes1
     7: open Flx_mtypes2
     8: open Flx_typing
     9: open Flx_mbind
    10: open Flx_srcref
    11: open List
    12: open Flx_unify
    13: open Flx_treg
    14: open Flx_generic
    15: open Flx_maps
    16: open Flx_exceptions
    17: open Flx_use
    18: open Flx_child
    19: open Flx_reparent
    20: open Flx_spexes
    21: open Flx_foldvars
    22: 
    23: 
    24: module BidSet = IntSet
    25: 
    26: let intset_of_list ls =
    27:   fold_left (fun s i -> IntSet.add i s) IntSet.empty ls
    28: 
    29: 
    30: let string_of_vs vs =
    31:   "[" ^ catmap "," (fun (s,i)->s^"<"^si i^">") vs ^ "]"
    32: 
    33: (* varmap is the *typevariable* remapper,
    34:  revariable remaps indices
    35: *)
    36: let ident x = x
    37: 
    38: (* Heavy inlining routine. This routine can inline
    39: any procedure. The basic operation is emit the body
    40: of the target procedure. We have to do the following to
    41: make it all work.
    42: 
    43: (1) Each declared label is replaced by a fresh one,
    44: and all jumps to these labels modified accordingly.
    45: 
    46: (2) Variables are replaced by fresh ones. This requires
    47: making additions to the output bound tables. References
    48: to the variables are modified. Note the parent is the
    49: caller now.
    50: 
    51: (3) Paremeters are replaced like variables, initialised
    52: by the arguments.
    53: 
    54: (4) Any type variables instantiated by the call must
    55: also be instantiated in body expressions, as well as
    56: the typing of any generated variables.
    57: 
    58: (5) If the procedure has any nested procedures, they
    59: also must be replaced in toto by fresh ones, reparented
    60: to the caller so that any calls to them will access
    61: the fresh variables in the caller.
    62: 
    63: Note that the cache of children of the caller will
    64: be wrong after the inlining (it may have acquired new
    65: variables or procedure children).
    66: 
    67: Note that this inlining procedure is NOT recursive!
    68: Its a flat one level inlining. This ensures recursive
    69: calls don't cause an infinite unrolling, and hopefully
    70: prevent gross bloat.
    71: *)
    72: 
    73: let mk_label_map syms exes =
    74:   let h = Hashtbl.create 97 in
    75:   let aux = function
    76:   | `BEXE_label (sr,s) ->
    77:     let n = !(syms.counter) in
    78:     incr syms.counter;
    79:     let s' =  "_" ^ si n in
    80:     Hashtbl.add h s s'
    81:   | _ -> ()
    82:   in
    83:     iter aux exes;
    84:     h
    85: 
    86: let idt t = t
    87: 
    88: let is_var bbdfns i =
    89:   match Hashtbl.find bbdfns i with
    90:   | _,_,_,`BBDCL_var _ -> true
    91:   | _ -> false
    92: 
    93: let is_simple_expr syms e =
    94:   print_endline ("Is " ^ sbe syms.dfns e ^ " simple?");
    95:   match e with
    96:   | `BEXPR_ref _,_ -> print_endline "YES"; true
    97:   | _ -> print_endline "NO"; false
    98: 
    99: (* CALL LIFTING. What this does is transform a call:
   100: 
   101:   call (f a) arg
   102: 
   103:   by replacing it with the body of f,
   104:   in which every
   105: 
   106:   return x
   107: 
   108:   is replaced by
   109: 
   110:   call x arguemnt
   111: 
   112:   This converts  f from a function returning
   113:   a procedure, to a procedure which executes that
   114:   procedure.
   115: 
   116:   NOTE: this is a special case of the distributive law.
   117: 
   118:   f (if c then a else b) v => if c then f a v else f b v
   119: 
   120: *)
   121: 
   122: let call_lifting syms (uses,child_map,bbdfns) caller caller_vs callee ts a argument =
   123:   (*
   124:   print_endline "DOING CALL LIFTING";
   125:   *)
   126:   let id,parent,sr,entry = Hashtbl.find bbdfns callee in
   127:   match entry with
   128:   | `BBDCL_function (props,vs,(ps,traint),ret,exes) ->
   129:     (*
   130:     print_endline ("Found procedure "^id^": Inline it!");
   131:     *)
   132:     let relabel = mk_label_map syms exes in
   133:     let varmap =
   134:       try mk_varmap vs ts
   135:       with Failure x ->
   136:         print_endline "[call_lifting] FAIL mk_varmap";
   137:         raise (Failure x)
   138:     in
   139:     let callee_vs_len = length vs in
   140: 
   141:     let revariable = reparent_children
   142:       syms (uses,child_map,bbdfns)
   143:       caller_vs callee_vs_len callee (Some caller) relabel varmap false
   144:     in
   145:     (* use the inliner to handle the heavy work *)
   146:     let body =
   147:       gen_body syms (uses,child_map,bbdfns) id varmap ps relabel revariable
   148:       exes a sr caller callee caller_vs callee_vs_len `Lazy props
   149:     in
   150: 
   151:     (* replace all function returns with tailed calls *)
   152:     let body2 = ref [] in
   153:     let n = !(syms.counter) in incr (syms.counter);
   154:     let end_label = "_end_call_lift_" ^ si n in
   155:     body2 := `BEXE_label (sr,end_label) :: !body2;
   156:     iter
   157:       (function
   158:       | `BEXE_fun_return (sr,e) ->
   159:         (* NOTE REVERSED ORDER *)
   160:         let call_instr =
   161:           (
   162:           (*
   163:           match e with
   164:           | `BEXPR_closure (i,ts),_ ->
   165:             `BEXE_call_direct (sr,i,ts,argument)
   166:           | `BEXPR_method_closure (obj,i,ts),_ ->
   167:             `BEXE_call_method_direct (sr,obj,i,ts,argument)
   168:           | _ ->
   169:           *)
   170:             `BEXE_call (sr,e,argument)
   171:           )
   172:         in
   173:         body2 := `BEXE_goto (sr,end_label) :: !body2;
   174:         body2 := call_instr :: !body2;
   175:       | `BEXE_yield _ ->
   176:         syserr sr "Attempt to inline generator containing a yield"
   177:       | x -> body2 := x::!body2
   178:       )
   179:       body
   180:     ;
   181:     (*
   182:     print_endline (
   183:      catmap "\n" (string_of_bexe syms.dfns 0) !body2
   184:     )
   185:     ;
   186:     *)
   187:     revariable,!body2 (* forward order *)
   188: 
   189:   | _ -> assert false
   190: 
   191: let inline_tail_apply syms (uses,child_map,bbdfns) caller caller_vs callee ts a =
   192:   (* TEMPORARY .. this should be allowed for unrolling but we do not do that yet *)
   193:   assert (callee <> caller);
   194:   let id,parent,sr,entry = Hashtbl.find bbdfns callee in
   195:   match entry with
   196:   | `BBDCL_function (props,vs,(ps,traint),ret,exes) ->
   197:     let id2,_,_,_ = Hashtbl.find bbdfns caller in
   198:     (*
   199:     print_endline
   200:     (
   201:       "TAIL Inlining function "^id^
   202:       "<"^si callee^">"^
   203:       "[" ^ catmap "," (sbt syms.dfns) ts ^ "] into " ^ id2 ^ "<" ^ si caller ^">"
   204:     );
   205:     *)
   206:     let relabel = mk_label_map syms exes in
   207:     let varmap =
   208:       try mk_varmap vs ts
   209:       with Failure x ->
   210:         print_endline "[inline_tail_apply] FAIL mk_varmap";
   211:         raise (Failure x)
   212:     in
   213:     let callee_vs_len = length vs in
   214: 
   215:     let revariable = reparent_children
   216:       syms (uses,child_map,bbdfns)
   217:       caller_vs callee_vs_len callee (Some caller) relabel varmap false
   218:     in
   219: 
   220:     (* use the inliner to handle the heavy work *)
   221:     let body =
   222:       gen_body syms (uses,child_map,bbdfns) id varmap ps relabel revariable
   223:       exes a sr caller callee caller_vs callee_vs_len `Lazy props
   224:     in
   225:     revariable,rev body
   226: 
   227:   | _ -> assert false
   228: 
   229: let inline_function syms (uses,child_map,bbdfns) caller caller_vs callee ts a varindex =
   230:   let id,parent,sr,entry = Hashtbl.find bbdfns callee in
   231:   match entry with
   232:   | `BBDCL_function (props,vs,(ps,traint),ret,exes) ->
   233:     (*
   234:     print_endline
   235:     (
   236:       "Inlining function "^id^
   237:       "<"^si callee^">"^
   238:       "[" ^ catmap "," (sbt syms.dfns) ts ^ "]"^
   239:       " retvar="^ si varindex ^
   240:       "\nvs = " ^ catmap "," (fun (s,i) -> s ^ "<" ^ si i ^ ">") vs
   241:     );
   242:     *)
   243:     let relabel = mk_label_map syms exes in
   244:     let varmap =
   245:       try mk_varmap vs ts
   246:       with Failure x ->
   247:         print_endline "[inline_function] FAIL mk_varmap";
   248:         raise (Failure x)
   249:     in
   250:     let callee_vs_len = length vs in
   251: 
   252:     let revariable = reparent_children
   253:       syms (uses,child_map,bbdfns)
   254:       caller_vs callee_vs_len callee (Some caller) relabel varmap false
   255:     in
   256: 
   257:     (* use the inliner to handle the heavy work *)
   258:     let body =
   259:       gen_body syms (uses,child_map,bbdfns) id varmap ps relabel revariable
   260:       exes a sr caller callee caller_vs callee_vs_len `Lazy props
   261:     in
   262: 
   263:     (*
   264:     print_endline "Replace returns with inits";
   265:     *)
   266:     (* replace all function returns with variable initialisations *)
   267:     let body2 = ref [] in
   268:     let n = !(syms.counter) in incr (syms.counter);
   269:     let end_label = "_end_inline_" ^ Flx_name.cid_of_flxid id ^ "_"^ si n in
   270:     let t = ref None in
   271:     let end_label_used = ref false in
   272:     iter
   273:       (function
   274:       | `BEXE_fun_return (sr,((_,t') as e)) ->
   275:         t := Some t';
   276:         if not (!body2 == []) then begin
   277:           body2 := `BEXE_goto (sr,end_label) :: !body2;
   278:           end_label_used := true
   279:         end
   280:         ;
   281:         let call_instr = `BEXE_init (sr,varindex,e) in
   282:         (*
   283:         print_endline ("Replacing return with init: " ^ string_of_bexe syms.dfns 0 call_instr);
   284:         *)
   285:         body2 := call_instr :: !body2;
   286: 
   287:       | `BEXE_yield _ ->
   288:         syserr sr "Attempt to inline generator with a yield"
   289: 
   290:       | x -> body2 := x::!body2
   291:       )
   292:       body
   293:     ;
   294:     (* Ugghhh *)
   295:     if !end_label_used then
   296:       body2 := !body2 @ [`BEXE_label (sr,end_label)]
   297:     ;
   298:     (*
   299:     print_endline (
   300:      catmap "\n" (string_of_bexe syms.dfns 0) !body2
   301:     )
   302:     ;
   303:     *)
   304:     revariable,!body2 (* forward order *)
   305: 
   306:   | _ -> assert false
   307: 
   308: let is_generator bbdfns i =
   309:   let id,parent,sr,entry = Hashtbl.find bbdfns i in
   310:   match entry with
   311:   | `BBDCL_fun (props,_,_,_,_,_,_)
   312:   | `BBDCL_function (props,_,_,_,_)
   313:     when mem `Generator props
   314:     -> true
   315:   | _ -> false
   316: 
   317: (* note u sr e must return exes in reverse order, this
   318:   function however returns exes in forward order
   319: *)
   320: let expand_exe syms bbdfns u exe =
   321:   let xs =
   322:     (*
   323:     print_endline ("EXPAND EXE " ^ string_of_bexe syms.dfns 0 exe);
   324:     *)
   325:     match exe with
   326:     | `BEXE_axiom_check _ -> assert false
   327:     | `BEXE_call_prim (sr,i,ts,e2) -> assert false
   328:       (*
   329:       let e,xs = u sr e2 in
   330:       `BEXE_call_prim (sr,i,ts,e) :: xs
   331:       *)
   332: 
   333:     | `BEXE_call_stack (sr,i,ts,e2) -> assert false
   334: 
   335:     | `BEXE_call_direct (sr,i,ts,e2) -> assert false
   336:       (*
   337:       let e,xs = u sr e2 in
   338:       `BEXE_call_direct (sr,i,ts,e) :: xs
   339:       *)
   340: 
   341:     | `BEXE_call_method_direct (sr,e1,i,ts,e2) ->
   342:       let e1,xs1 = u sr e1 in
   343:       let e2,xs2 = u sr e2 in
   344:       `BEXE_call_method_direct (sr,e1,i,ts,e2) :: xs2 @ xs1
   345: 
   346:     | `BEXE_call_method_stack (sr,e1,i,ts,e2) ->
   347:       let e1,xs1 = u sr e1 in
   348:       let e2,xs2 = u sr e2 in
   349:       `BEXE_call_method_stack (sr,e1,i,ts,e2) :: xs2 @ xs1
   350: 
   351:     | `BEXE_jump_direct (sr,i,ts,e2) -> assert false
   352:       (*
   353:       let e,xs = u sr e2 in
   354:       `BEXE_jump_direct (sr,i,ts,e) :: xs
   355:       *)
   356: 
   357:     | `BEXE_apply_ctor (sr,i1,i2,ts,i3,e2) ->
   358:       let e,xs = u sr e2 in
   359:       `BEXE_apply_ctor (sr,i1,i2,ts,i3,e) :: xs
   360: 
   361:     | `BEXE_apply_ctor_stack (sr,i1,i2,ts,i3,e2) ->
   362:       let e,xs = u sr e2 in
   363:       `BEXE_apply_ctor_stack (sr,i1,i2,ts,i3,e) :: xs
   364: 
   365:     | `BEXE_assign (sr,e1,e2) ->
   366:       let e1,xs1 = u sr e1 in
   367:       let e2,xs2 = u sr e2 in
   368:       `BEXE_assign (sr,e1,e2) :: xs2 @ xs1
   369: 
   370:     | `BEXE_assert (sr,e) ->
   371:       let e,xs = u sr e in
   372:       `BEXE_assert (sr,e) :: xs
   373: 
   374:     | `BEXE_assert2 (sr,sr2,e1,e2) ->
   375:       let e1,xs1 =
   376:         match e1 with Some e -> let a,b = u sr e in Some a,b
   377:         | None -> None,[]
   378:       in
   379:       let e2,xs2 = u sr e2 in
   380:       `BEXE_assert2 (sr,sr2,e1,e2) :: xs2 @ xs1
   381: 
   382:     (* preserve call lift pattern ??*)
   383:     | `BEXE_call (sr,(`BEXPR_apply((`BEXPR_closure(i,ts),t'),e1),t),e2) ->
   384:       let e1,xs1 = u sr e1 in
   385:       let e2,xs2 = u sr e2 in
   386:       `BEXE_call (sr,(`BEXPR_apply((`BEXPR_closure(i,ts),t'),e1),t),e2) :: xs2 @ xs1
   387: 
   388:     | `BEXE_call (sr,e1,e2) ->
   389:       let e1,xs1 = u sr e1 in
   390:       let e2,xs2 = u sr e2 in
   391:       `BEXE_call (sr,e1,e2) :: xs2 @ xs1
   392: 
   393:     | `BEXE_jump (sr,e1,e2) -> assert false
   394: 
   395:     | `BEXE_loop (sr,i,e) -> assert false
   396:       (*
   397:       let e,xs = u sr e in
   398:       `BEXE_loop (sr,i,e) :: xs
   399:       *)
   400: 
   401:     | `BEXE_ifgoto (sr,e,lab) ->
   402:       let e,xs = u sr e in
   403:       `BEXE_ifgoto (sr,e,lab) :: xs
   404: 
   405:     | `BEXE_ifnotgoto (sr,e,lab) ->
   406:       let e,xs = u sr e in
   407:       `BEXE_ifnotgoto (sr,e,lab) :: xs
   408: 
   409:     (* preserve tail call pattern -- used by both
   410:        tail-rec eliminator
   411:        and by call lifter (which converts returns to calls)
   412:     *)
   413:     | `BEXE_fun_return (sr,(`BEXPR_apply((`BEXPR_closure(i,ts),t'),e),t)) ->
   414:       let e,xs = u sr e in
   415:       `BEXE_fun_return (sr,(`BEXPR_apply((`BEXPR_closure(i,ts),t'),e),t)) :: xs
   416: 
   417:     | `BEXE_fun_return (sr,e) ->
   418:       let e,xs = u sr e in
   419:       `BEXE_fun_return (sr,e) :: xs
   420: 
   421:     | `BEXE_yield (sr,e) ->
   422:       let e,xs = u sr e in
   423:       `BEXE_yield (sr,e) :: xs
   424: 
   425:     (* This case has to be handled specially, in case we already
   426:        have a simplified form, and the unravelling introduces
   427:        a gratuitous extra variable: for example
   428: 
   429:        x : = f a
   430: 
   431:        might expand to
   432: 
   433:        x' = f a
   434:        x := x'
   435: 
   436:        which is rather pointless. There is, unfortunately,
   437:        a duplicate of this check elsewhere ..
   438:     *)
   439: 
   440:     | `BEXE_init (sr,i,(`BEXPR_apply((`BEXPR_closure (j,ts),t'),e),t))
   441:       (*
   442:       when is_generator bbdfns j
   443:       *)
   444:       ->
   445:       let e,xs = u sr e in
   446:       `BEXE_init (sr,i,(`BEXPR_apply((`BEXPR_closure (j,ts),t'),e),t)) :: xs
   447: 
   448:     | `BEXE_init (sr,i,e) ->
   449:       let e,xs = u sr e in
   450:       `BEXE_init (sr,i,e) :: xs
   451: 
   452:     | `BEXE_svc _
   453:     | `BEXE_label _
   454:     | `BEXE_goto _
   455:     | `BEXE_code _
   456:     | `BEXE_nonreturn_code _
   457:     | `BEXE_proc_return _
   458:     | `BEXE_comment _
   459:     | `BEXE_nop _
   460:     | `BEXE_halt _
   461:     | `BEXE_begin
   462:     | `BEXE_end
   463:       -> [exe]
   464:   in
   465:     let xs = rev xs in
   466:     xs
   467: 
   468: let check_reductions syms exes = Flx_reduce.reduce_exes syms syms.reductions exes
   469: 
   470: let heavy_inline_call syms (uses,child_map,bbdfns)
   471:   caller caller_vs callee ts argument id sr (props, vs, (ps,traint), exes)
   472: =
   473:   (*
   474:   print_endline ("INLINING CALL to " ^ id ^"<"^ si callee^">("^sbe syms.dfns argument^")");
   475:   print_endline ("In procedure " ^ si caller ^ " with vs=" ^ string_of_vs caller_vs);
   476:   print_endline ("Callee is " ^ id ^ "<"^si callee ^ "> with ts = " ^ catmap "," (sbt syms.dfns) ts);
   477:   print_endline ("Callee vs=" ^ string_of_vs vs);
   478:   *)
   479:   let caller_vs_len = length caller_vs in
   480:   let callee_vs_len = length vs in
   481:   (*
   482:   print_endline ("In the callee and its children,");
   483:   print_endline ("The callee vs are elided and replaced by the caller vs");
   484:   print_endline ("ELIDE: first " ^ si callee_vs_len ^ ", PREPEND " ^ si caller_vs_len);
   485:   print_endline ("This works by instantiating the callee vs with the calls ts");
   486:   *)
   487:   assert(length vs = length ts);
   488: 
   489:   (*
   490:   print_endline ("Found procedure "^id^": Inline it!");
   491:   *)
   492:   let relabel = mk_label_map syms exes in
   493:   let varmap =
   494:     try mk_varmap vs ts
   495:     with Failure x ->
   496:       print_endline "[heavy_inline_call] FAIL mk_varmap";
   497:       raise (Failure x)
   498:   in
   499:   let revariable = reparent_children
   500:     syms (uses,child_map,bbdfns)
   501:     caller_vs callee_vs_len callee (Some caller) relabel varmap false
   502:   in
   503:   let xs = gen_body syms (uses,child_map,bbdfns) id
   504:     varmap ps relabel revariable exes
   505:     argument sr caller callee caller_vs callee_vs_len `Lazy props
   506:   in
   507:     revariable,rev xs (* forward order *)
   508: 
   509: let make_specialisation syms (uses,child_map,bbdfns)
   510:   caller caller_vs callee ts id sr parent props vs exes rescan_flag
   511: =
   512:   (*
   513:   print_endline ("Specialising call " ^ id ^ "<"^si callee ^ ">[" ^ catmap "," (sbt syms.dfns) ts ^"]");
   514:   print_endline ("In procedure " ^ si caller ^ " with vs=" ^ string_of_vs caller_vs);
   515:   print_endline ("Callee vs=" ^ string_of_vs vs);
   516:   *)
   517:   let caller_vs_len = length caller_vs in
   518:   let callee_vs_len = length vs in
   519: 
   520:   (*
   521:   print_endline ("In the callee and its children,");
   522:   print_endline ("The callee vs are elided and replaced by the caller vs");
   523:   print_endline ("ELIDE: first " ^ si callee_vs_len ^ ", PREPEND " ^ si caller_vs_len);
   524:   print_endline ("This works by instantiating the callee vs with the calls ts");
   525:   *)
   526:   assert(length vs = length ts);
   527: 
   528:   (*
   529:   print_endline ("Found procedure "^id^": Inline it!");
   530:   *)
   531:   let relabel = mk_label_map syms exes in
   532:   let varmap =
   533:     try mk_varmap vs ts
   534:     with Failure x ->
   535:       print_endline "[make_specialisation] FAIL mk_varmap";
   536:       raise (Failure x)
   537:   in
   538:   let k,ts' =
   539:     specialise_symbol
   540:       syms (uses,child_map,bbdfns)
   541:       caller_vs callee_vs_len callee ts parent relabel varmap rescan_flag
   542:    in
   543:    (*
   544:    print_endline ("Specialised to " ^ id ^ "<"^si k ^ "> with ts = " ^ catmap "," (sbt syms.dfns) ts');
   545:    *)
   546:    k,ts'
   547: 
   548: (* Dependency analyser. This should be generalised,
   549: but for now we only use it in tail calls.
   550: 
   551: We wish to discover what *local* vals an expression e in
   552: some routine i depends on.
   553: 
   554: These are (a) the symbols manifestly used in the expression,
   555: and (b) any variable used by any function that is called.
   556: 
   557: We can calculate this, expensively as the union of the
   558: use closures of each symbol in the expression intersected
   559: with the candidate locals.
   560: *)
   561: 
   562: 
   563: (* note returns exes in reverse order *)
   564: (* This routine analyses an expression to see if it has  the form
   565: 
   566:   f a
   567: 
   568: If so it is replaced by v and a statement v = f a, then
   569: this initialisation is replaced by the body of f
   570: with a replacing the parameter,
   571: where returns are replaced by initialisations of v
   572: and a goto the end of the routine.
   573: 
   574: Then in the special case the last line of the body
   575: resolves to the form
   576: 
   577:   v = e'
   578: 
   579: the expression is replaced by e'. This works by a quirk,
   580: that this code must have come from a sole tail return
   581: in the body. If there were more than one return,
   582: prior returns would be a return to a label after it,
   583: however the inliner doesn't generate the label at the
   584: end for a sole tail return, so we can assume this
   585: is the only return.
   586: 
   587: The result leaves an expression in a place where
   588: a tail call might be recognized, avoiding a temporary
   589: which prevents simplistic patterns representing data
   590: and control flow. Although its a hack, it is important
   591: to ensure trivial functions have no overhead.
   592: 
   593: Note this routine, in itself, does NOT rescan anything:
   594: there is no recursion -- other than the recursive traversal
   595: of the original expression, done by the 'aux' function.
   596: *)
   597: 
   598: let inlining_complete bbdfns i =
   599:   let _,_,_,entry = Hashtbl.find bbdfns i in
   600:   match entry with
   601:   | `BBDCL_function (props,_,_,_,_)
   602:   | `BBDCL_procedure (props,_,_,_) ->
   603:     mem `Inlining_complete props
   604:   | `BBDCL_proc _
   605:   | `BBDCL_fun _
   606:     -> true
   607: 
   608:   | _ -> assert false
   609: 
   610: 
   611: (*
   612: 
   613: 
   614: See post in felix-language. The problem is knowing
   615: when to inline a function: typeclass virtual function
   616: default methods can only be inlined if there is an instance
   617: of the typeclass AND the virtual is not overridden in the
   618: instance.
   619: 
   620: Proposed algorithm.
   621: 
   622: 1. Check if the function is virtual.
   623: 
   624: 2. If so, find its parent, which is the typeclass
   625: 
   626: 3. strip the tail off the instantiating ts so it
   627:    matches the length of the typeclass vs list
   628:    (in case the function is polymorphic, the function's
   629:     private type arguments will be the remaining ones)
   630: 
   631: 3. Using a table of pairs:
   632: 
   633:         (typeclass, (instance, (vs,ts)))
   634: 
   635: discover if there is an instance. This is actually hard:
   636: the check actually requires seeing if the given ts specialises
   637: one of the ts in the above table for the given typeclass.
   638: The instantiation's vs is required too, since the ts of the
   639: typeclass have to be mapped to the instance view.
   640: 
   641: 4. IF there is a match:
   642: 
   643: 4a. try to find an instance  of the virtual function.
   644: 
   645: 4b. If none is found, then inline the virtual function
   646:     default body
   647: 
   648: 4c.  otherwise inline the instance.
   649: 
   650: 5. otherwise (no instance) leave the call alone.
   651: 
   652: IF we know the code is fully monorphised then 5 becomes
   653: instead an error.
   654: 
   655: Note that Felix currently DOES NOT detect this error.
   656: If the function has a default, it will be used even
   657: if there is no instance. This will result in either
   658: an infinite recursion at run time OR lead to another
   659: virtual that has no body, resulting in an error diagnostic.
   660: 
   661: But the infinite recursion is also possible even if there
   662: is an instance .. so nothing is lost here.. :)
   663: 
   664: *)
   665: 
   666: let virtual_check syms (bbdfns:fully_bound_symbol_table_t) sr i ts =
   667:   let id,parent,callee_sr,entry = Hashtbl.find bbdfns i in
   668:   (*
   669:   print_endline ("virtual check Examining call to " ^ id ^ "<" ^ si i ^ ">");
   670:   *)
   671:   match entry with
   672:   | `BBDCL_fun (props,_,_,_,_,_,_)
   673:   | `BBDCL_function (props,_,_,_,_)
   674:   | `BBDCL_proc (props,_,_,_,_)
   675:   | `BBDCL_procedure (props,_,_,_) when mem `Virtual props ->
   676:     (*
   677:     print_endline ("Examining call to virtual " ^ id);
   678:     *)
   679:     let parent = match parent with | Some p -> p | None -> assert false in
   680:     let tcvslen =
   681:       try
   682:         let {id=pid; vs=vs; symdef=entry} = Hashtbl.find syms.dfns parent in
   683:         match entry with
   684:         | `SYMDEF_typeclass ->
   685:           (*
   686:           print_endline ("Found parent " ^ pid ^ "<" ^ si i ^ ">");
   687:           *)
   688:           List.length (fst vs)
   689:         | _ ->
   690:           print_endline "Woops, parent isn't typeclass?";
   691:           assert false
   692:       with Not_found ->
   693:         print_endline ("Parent typeclass " ^ si parent ^ " not found!");
   694:         assert false
   695:     in
   696:     let tslen = List.length ts in
   697:     (*
   698:     print_endline ("Vs len of parent = " ^ si tcvslen);
   699:     print_endline ("ts len           = " ^ si tslen);
   700:     *)
   701:     if tcvslen > tslen then
   702:       clierr sr "Not enough type arguments for typeclass"
   703:     ;
   704:     let fts = rev (list_prefix (rev ts) (tslen - tcvslen)) in
   705:     let ts = list_prefix ts tcvslen in
   706:     let instances =
   707:       try Hashtbl.find syms.instances_of_typeclass parent
   708:       with Not_found ->
   709:         (*
   710:         print_endline "No instances of typeclass?";
   711:         *)
   712:         (*
   713:         assert false
   714:         *)
   715:         []
   716:     in
   717:     (*
   718:     print_endline "Found some instances!";
   719:     print_endline ("ts = " ^ catmap "," (sbt syms.dfns) ts);
   720:     *)
   721:     let matches = ref [] in
   722:     iter (fun (j,(jvs,jcon,jts)) ->
   723:       (*
   724:       print_endline ("instance[" ^
   725:         catmap "," (fun (s,i) -> s^ "<"^si i^">") jvs ^ "] " ^
   726:         si j ^ "[" ^
   727:         catmap "," (sbt syms.dfns) jts ^ "]"
   728:       );
   729:       *)
   730:       (* check if the call specialises the instance. *)
   731:       let ok =
   732:         Flx_typeclass.tcinst_chk syms true i ts sr
   733:           (jvs, jcon,jts, j)
   734:       in
   735:       begin match ok with
   736:       | Some _ ->
   737:         (*
   738:         print_endline "matches";
   739:         *)
   740:         matches := j :: !matches
   741: 
   742:       | None -> (* print_endline "Doesn't match"; *)  ()
   743:       end
   744:     )
   745:     instances
   746:     ;
   747:     begin match !matches with
   748:     | [_] ->
   749:       let i',ts' =
   750:         Flx_typeclass.maybe_fixup_typeclass_instance syms bbdfns i ts
   751:       in
   752:       if i = i' then begin
   753:         (*
   754:         print_endline (id ^ " -- Dispatch to default");
   755:         *)
   756:         true,i',ts' @ fts
   757:       end else begin
   758:         (*
   759:         print_endline (id ^ " -- Dispatch to instance");
   760:         *)
   761:         true,i',ts' @ fts
   762:       end
   763:     | _ ->
   764:       (*
   765:       print_endline (id ^ " -- Dispatch unknown");
   766:       *)
   767:       false,i,ts @ fts
   768:     end
   769: 
   770:   | _ -> (* print_endline (id ^ " -- Not virtual") *) true,i,ts
   771: 
   772: let rec special_inline syms (uses,child_map,bbdfns) caller_vs caller hic excludes sr e =
   773:   (*
   774:   print_endline ("Special inline " ^ sbe syms.dfns e);
   775:   *)
   776:   let exes' = ref [] in
   777:   let id x = x in
   778:   let rec aux e =
   779:   (*
   780:   print_endline (" ... Special inline subexpr: " ^ sbe syms.dfns e);
   781:   *)
   782:   match map_tbexpr id aux id e with
   783:   | `BEXPR_closure (callee,_),_ as x ->
   784:       heavily_inline_bbdcl syms (uses,child_map,bbdfns) (callee::excludes) callee;
   785:       x
   786: 
   787:   | ((`BEXPR_apply_prim (callee,ts,a),t) as e)
   788:   | ((`BEXPR_apply_stack (callee,ts,a),t) as e)
   789:   | ((`BEXPR_apply_direct (callee,ts,a),t) as e) -> assert false
   790: 
   791:   | (((`BEXPR_apply(  (`BEXPR_closure (callee,ts),_) ,a)),t) as e)
   792:     ->
   793:       let can_inline,callee,ts = virtual_check syms bbdfns sr callee ts in
   794:       if not (mem callee excludes) then begin
   795:         heavily_inline_bbdcl syms (uses,child_map,bbdfns) (callee::excludes) callee;
   796:         let id,parent,sr,entry = Hashtbl.find bbdfns callee in
   797:         begin match entry with
   798: 
   799: 
   800:         (* THIS CODE IS PROBABLY IN THE WRONG PLACE!
   801: 
   802:            The technique is ALSO probably insecure because
   803:            it only works with direct applications .. but
   804:            the overloading works with closures too.
   805: 
   806:            To make this actually work properly requires a change
   807:            to the type system.
   808: 
   809:            In addition, it isn't clear the lifted call is optimised
   810:            as it should be (it should be inlined of course).
   811:            In particular, its argument may also include calls
   812:            needing lifting.
   813:         *)
   814: 
   815:         (* This code must ONLY be triggered by an inner (unlifted) application
   816:            so the detector must not be recursively applied to RHS of initialisation
   817:            x = f a
   818:            where f is the generator, since that form is properly lifted ..
   819:            otherwise we get a chain:
   820:            x1 = f a
   821:            x2 = x1
   822:            x3 = x2
   823:            ...
   824:         *)
   825:         | `BBDCL_fun (props,_,_,_,_,_,_)
   826:         | `BBDCL_function (props,_,_,_,_)
   827:           when mem `Generator props
   828:           ->
   829:           (*
   830:           print_endline ("Unravel generator " ^ id);
   831:           *)
   832: 
   833:           (* create a new variable *)
   834:           let urv = !(syms.counter) in incr (syms.counter);
   835:           let urvid = "_genout_urv" ^ si urv in
   836:           add_child child_map caller urv;
   837:           add_use uses caller urv sr;
   838:           let entry = `BBDCL_var (caller_vs,t) in
   839:           Hashtbl.add bbdfns urv (urvid,Some caller,sr,entry);
   840: 
   841:           (* set variable to function appliction *)
   842:           let cll = `BEXE_init (sr,urv,e) in
   843:           exes' := cll :: !exes';
   844: 
   845: 
   846:           (* replace application with the variable *)
   847:           let ts = map (fun (_,i)-> `BTYP_var (i,`BTYP_type 0)) caller_vs in
   848:           `BEXPR_name (urv,ts),t
   849: 
   850:         | `BBDCL_function (props,vs,(ps,traint),ret,exes) ->
   851:           (* TEMPORARY FIX! *)
   852: 
   853:           (* create a new variable *)
   854:           let urv = !(syms.counter) in incr (syms.counter);
   855:           let urvid = "_urv" ^ si urv in
   856:           add_child child_map caller urv;
   857:           add_use uses caller urv sr;
   858:           let entry = `BBDCL_val (caller_vs,t) in
   859:           Hashtbl.add bbdfns urv (urvid,Some caller,sr,entry);
   860: 
   861:           (* set variable to function appliction *)
   862:           let cll = `BEXE_init (sr,urv,e) in
   863:           exes' := cll :: !exes';
   864: 
   865: 
   866:           (* replace application with the variable *)
   867:           let ts = map (fun (_,i)-> `BTYP_var (i,`BTYP_type 0)) caller_vs in
   868:           `BEXPR_name (urv,ts),t
   869: 
   870: 
   871: 
   872:           (*
   873:           (*
   874:           print_endline ("Consider inlining " ^ id);
   875:           *)
   876:           (*
   877:           if is_child child_map caller callee then
   878:             print_endline ("Callee "^si callee ^" is child of caller " ^ si caller ^ " EXCLUDE LEN= " ^ si (length excludes))
   879:           ;
   880:           *)
   881:           if can_inline &&
   882:             not (mem `NoInline props) &&
   883:             (
   884:               mem `Inline props ||
   885:               length exes <= syms.compiler_options.max_inline_length
   886:             ) &&
   887:            (
   888:               (* only inline a recursive call to a child *)
   889:               not (Flx_call.is_recursive_call uses caller callee) ||
   890:               is_child child_map caller callee
   891:            )
   892:           then
   893:               begin
   894:                 (*
   895:                 heavily_inline_bbdcl syms (uses,child_map,bbdfns) (callee::excludes) callee;
   896:                 *)
   897:                 if not (inlining_complete bbdfns callee) then print_endline "Inlining isn't complete in callee ..??";
   898: 
   899:                 if inlining_complete bbdfns callee then begin
   900:                   (*
   901:                   print_endline ("INLINE " ^ id ^ "<" ^ si callee ^ ">");
   902:                   print_endline ("Special inline " ^ si caller ^" calls " ^ si callee);
   903:                   *)
   904:                   (* GENERAL CASE -- we need to add a variable *)
   905:                   let urv = !(syms.counter) in incr (syms.counter);
   906:                   (* inline the code, replacing returns with variable inits *)
   907:                   let revariable,xs =
   908:                      inline_function syms (uses,child_map,bbdfns) caller caller_vs callee ts a urv
   909:                   in
   910:                   (*
   911:                   print_endline "Inline body = ";
   912:                   iter (fun exe -> print_endline (string_of_bexe syms.dfns 4 exe)) xs;
   913:                   *)
   914:                   let xs = hic revariable callee xs in
   915:                   match rev xs with
   916:                   (* SPECIAL CASE DETECTOR: if the inlined function
   917:                     terminates with an initialisation of the new variable,
   918:                     ignore the variable and use the value used to initialise
   919:                     it instead. This is sure to be the result of the sole
   920:                     trailing return. If there were another return, a
   921:                     jump to the end of the function would be needed,
   922:                     past this initialisation, which would require a label
   923:                     at the end of the function
   924: 
   925:                     Note this is a bad form of 'apply lifting'.
   926:                     We should be able to inline
   927: 
   928:                     f (g x)
   929: 
   930:                     by inlining g x, and replacing 'return e'
   931:                     with 'v = f e' everywhere. instead we get
   932:                     v = e in various places, then f v.
   933: 
   934:                     To do this right we need to see a double application.
   935:                   *)
   936:                   | [] -> assert false
   937:                   | `BEXE_init (sr,j,e') :: tail ->
   938:                     assert (j==urv);
   939:                     (*
   940:                     print_endline "DETECTED SPECIAL CASE";
   941:                     print_endline "Outputing tail:";
   942:                     iter (fun x -> print_endline (string_of_bexe syms.dfns 0 x)) (rev tail);
   943:                     print_endline ("Expr: " ^ sbe syms.dfns e');
   944:                     *)
   945:                     exes' := tail @ !exes';
   946:                     e'
   947:                   | rxs ->
   948:                     let urvid = "_urv" ^ si urv in
   949:                     add_child child_map caller urv;
   950:                     add_use uses caller urv sr;
   951:                     let entry = `BBDCL_val (caller_vs,t) in
   952:                     Hashtbl.add bbdfns urv (urvid,Some caller,sr,entry);
   953: 
   954:                     exes' := rxs @ !exes';
   955:                     let ts = map (fun (_,i)-> `BTYP_var (i,`BTYP_type 0)) caller_vs in
   956:                     `BEXPR_name (urv,ts),t
   957:                 end
   958:                 else e
   959:               end
   960:           else e
   961:         *)
   962:         | _ -> e
   963:         end
   964:       end else e
   965: 
   966:   | x -> x
   967:   in
   968:    let e = aux e in (* we need left to right evaluation here ..*)
   969:    e,!exes'
   970: 
   971: and heavy_inline_calls
   972:   syms (uses,child_map,bbdfns)
   973:   caller_vs caller excludes exes
   974: =
   975:   (*
   976:   print_endline ("HIC: Input excludes = " ^ catmap "," si excludes);
   977:   *)
   978:   let inline_check caller callee props exes =
   979:     not (mem `NoInline props) &&
   980:     (
   981:         mem `Inline props ||
   982:         length exes <= syms.compiler_options.max_inline_length
   983:     ) &&
   984:     (
   985:       (* only inline a recursive call to a child *)
   986:       not (Flx_call.is_recursive_call uses caller callee) ||
   987:       is_child child_map caller callee
   988:     )
   989:   in
   990:   let specialise_check caller callee ts props exes = false
   991:     (*
   992:     (* for the moment, don't specialise recursive calls *)
   993:     ts <> [] &&
   994:     not (Flx_call.is_recursive_call uses caller callee)
   995:     *)
   996:   in
   997:   let hic revariable callee exes = if false then exes else
   998:     (*
   999:     print_endline "Rescanning ..";
  1000:     *)
  1001:     let excludes = fold_left
  1002:     (fun acc i ->
  1003:       i :: (try [Hashtbl.find revariable i] with Not_found -> []) @ acc
  1004:     )
  1005:     []
  1006:     (callee::excludes)
  1007:     in
  1008:     heavy_inline_calls syms (uses,child_map,bbdfns)
  1009:     caller_vs caller excludes exes
  1010:   in
  1011: 
  1012:   (* The function ee applies the special inlining routine
  1013:     to all subexpressions of an expression, bottom up
  1014:     (that is, inside out).
  1015:   *)
  1016: 
  1017:   let sinl sr e = special_inline syms (uses,child_map,bbdfns) caller_vs caller hic (caller::excludes) sr e in
  1018: 
  1019:   let ee exe = expand_exe syms bbdfns sinl exe in
  1020:   let exes' = ref [] in (* reverse order *)
  1021:   iter  (* each exe *)
  1022:   (fun exeIN ->
  1023:     (*
  1024:     print_endline ("EXE[in] =" ^ string_of_bexe syms.dfns 0 exeIN);
  1025:     *)
  1026:     let xs = ee exeIN in
  1027:     (*
  1028:     iter (fun x -> print_endline ("EXE[out]=" ^ string_of_bexe syms.dfns 0 x)) xs;
  1029:     print_endline "--";
  1030:     *)
  1031:     (*
  1032:       This code RESCANS the result of the special inliner.
  1033:       The special inliner only handles function applications,
  1034:       this code should NOT handle them because iteration might
  1035:       lead to infinite recurse ..??
  1036: 
  1037:       This means the 'special cases' handled must be
  1038:       disjoint.
  1039: 
  1040:       Unfortunately, when inlining a function, we first
  1041:       inline into the function, then dump the result and
  1042:       rescan it. Consequently the recursion stop applied
  1043:       which leaves a direct non-tail self call will be
  1044:       rescanned here, and the function will be unfolded
  1045:       again .. in that process we also redo the special
  1046:       inlining .. infinite recursion. This is stopped
  1047:       by the flag which prevents inlining into a function
  1048:       more than once .. but that doesn't work if the
  1049:       function is cloned.
  1050:     *)
  1051:     iter (fun exe ->
  1052:     match exe with
  1053:     | `BEXE_call (sr,(`BEXPR_closure(callee,ts),clt),argument)
  1054:     (*
  1055:     | `BEXE_call_direct (sr,callee,ts,argument)
  1056:     *)
  1057:       when not (mem callee excludes)
  1058:       ->
  1059:       let can_inline,callee,ts = virtual_check syms bbdfns sr callee ts in
  1060:       heavily_inline_bbdcl syms (uses,child_map,bbdfns) (callee::excludes) callee;
  1061:       let id,parent,callee_sr,entry = Hashtbl.find bbdfns callee in
  1062:       (*
  1063:       print_endline ("CALL DIRECT " ^ id ^ "<"^ si callee^">");
  1064:       *)
  1065:       begin match entry with
  1066:       | `BBDCL_procedure (props,vs,(ps,traint),exes) ->
  1067:         if can_inline && inline_check caller callee props exes then
  1068:         begin
  1069:           if syms.compiler_options.print_flag then
  1070:           print_endline ("inlining direct call: " ^ string_of_bexe syms.dfns 0 exe);
  1071:           let revariable,xs =
  1072:             heavy_inline_call syms (uses,child_map,bbdfns)
  1073:             caller caller_vs callee ts argument id sr (props,vs,(ps,traint),exes)
  1074:           in
  1075:           let xs = hic revariable callee xs in
  1076:           exes' := rev xs @ !exes'
  1077:         end
  1078:         else
  1079:           exes' := exe :: !exes'
  1080: 
  1081:       | _ ->  exes' := exe :: !exes'
  1082:       end
  1083: 
  1084:     | `BEXE_call (sr,(`BEXPR_apply_stack (callee,ts,a),_),argument)
  1085:     | `BEXE_call (sr,(`BEXPR_apply_prim (callee,ts,a),_),argument)
  1086:     | `BEXE_call (sr,(`BEXPR_apply_direct (callee,ts,a),_),argument)
  1087:       -> assert false
  1088: 
  1089:     | `BEXE_call (sr,(`BEXPR_apply((`BEXPR_closure (callee,ts),_),a),_),argument)
  1090:       when not (mem callee excludes)
  1091:       ->
  1092:       (*
  1093:       print_endline "DETECTED CANDIDATE FOR CALL LIFTING ";
  1094:       print_endline ("In procedure " ^ si caller ^ " with vs=" ^ string_of_vs caller_vs);
  1095:       *)
  1096:       (*
  1097:       print_endline ("handling call lift: " ^ string_of_bexe syms.dfns 0 exe);
  1098:       print_endline ("Callee is " ^ si callee ^ " with ts = " ^ catmap "," (sbt syms.dfns) ts);
  1099:       *)
  1100:       let can_inline,callee,ts = virtual_check syms bbdfns sr callee ts in
  1101:       heavily_inline_bbdcl syms (uses,child_map,bbdfns) (callee::excludes) callee;
  1102:       let id,parent,callee_sr,entry = Hashtbl.find bbdfns callee in
  1103:       begin match entry with
  1104:       | `BBDCL_function (props,vs,(ps,traint),ret,exes) ->
  1105:         if can_inline && inline_check caller callee props exes then
  1106:         begin
  1107:           if syms.compiler_options.print_flag then
  1108:           print_endline ("Inline call lift: " ^ string_of_bexe syms.dfns 0 exe);
  1109:           let revariable,xs =
  1110:             call_lifting syms (uses,child_map,bbdfns) caller caller_vs callee ts a argument
  1111:           in
  1112:           let xs = hic revariable callee xs in
  1113:           exes' := rev xs @ !exes'
  1114:         end else
  1115:           exes' := exe :: !exes'
  1116:       | _ -> exes' := exe :: !exes'
  1117:       end
  1118: 
  1119:     | `BEXE_init (sr,i,(`BEXPR_apply_stack (callee,ts,a),_))
  1120:     | `BEXE_init (sr,i,(`BEXPR_apply_prim (callee,ts,a),_))
  1121:     | `BEXE_init (sr,i,(`BEXPR_apply_direct (callee,ts,a),_))
  1122:       -> assert false
  1123: 
  1124:     | `BEXE_init (sr,i,(`BEXPR_apply ((`BEXPR_closure(callee,ts),_),a),_))
  1125:       when not (mem callee excludes)  ->
  1126:       let can_inline,callee,ts = virtual_check syms bbdfns sr callee ts in
  1127:       heavily_inline_bbdcl syms (uses,child_map,bbdfns) (callee::excludes) callee;
  1128:       let id,parent,callee_sr,entry = Hashtbl.find bbdfns callee in
  1129:       begin match entry with
  1130:       | `BBDCL_function (props,vs,(ps,traint),ret,exes) ->
  1131:         if can_inline && inline_check caller callee props exes then
  1132:           begin
  1133:             let vid,vparent,vsr,ventry = Hashtbl.find bbdfns i in
  1134:             begin match ventry with
  1135:             | `BBDCL_tmp (vs,t) ->
  1136:               (*
  1137:               print_endline ("Downgrading temporary .." ^ si i);
  1138:               *)
  1139:               (* should this be a VAR or a VAL? *)
  1140:               Hashtbl.replace bbdfns i (vid,vparent,vsr,`BBDCL_var (vs,t))
  1141:             | _ -> ()
  1142:             end;
  1143:             if syms.compiler_options.print_flag then
  1144:             print_endline ("Inline init: " ^ string_of_bexe syms.dfns 0 exe);
  1145:             let revariable,xs =
  1146:               inline_function syms (uses,child_map,bbdfns) caller caller_vs callee ts a i
  1147:             in
  1148:             let xs = hic revariable callee xs in
  1149:             exes' := rev xs @ !exes'
  1150:           end
  1151:         else
  1152:           exes' := exe :: !exes'
  1153:       | _ -> exes' := exe :: !exes'
  1154:       end
  1155: 
  1156:     | `BEXE_fun_return (sr,(`BEXPR_apply_direct (callee,ts,a),_))
  1157:     | `BEXE_fun_return (sr,(`BEXPR_apply_stack (callee,ts,a),_))
  1158:     | `BEXE_fun_return (sr,(`BEXPR_apply_prim (callee,ts,a),_))
  1159:      -> assert false
  1160: 
  1161:     | `BEXE_fun_return (sr,(`BEXPR_apply((`BEXPR_closure(callee,ts),_),a),_))
  1162:       when not (mem callee excludes)  ->
  1163:       let can_inline,callee,ts = virtual_check syms bbdfns sr callee ts in
  1164:       heavily_inline_bbdcl syms (uses,child_map,bbdfns) (callee::excludes) callee;
  1165:       let id,parent,callee_sr,entry = Hashtbl.find bbdfns callee in
  1166:       begin match entry with
  1167:       | `BBDCL_function (props,vs,(ps,traint),ret,exes) ->
  1168:         if can_inline && inline_check caller callee props exes then
  1169:         begin
  1170:           if inlining_complete bbdfns callee then
  1171:           begin
  1172:             if syms.compiler_options.print_flag then
  1173:             print_endline ("Inline tail apply : " ^ string_of_bexe syms.dfns 0 exe);
  1174:             let revariable,xs =
  1175:               inline_tail_apply syms (uses,child_map,bbdfns) caller caller_vs callee ts a
  1176:             in
  1177:             let xs = hic revariable callee xs in
  1178:             exes' := rev xs @ !exes'
  1179:           end else
  1180:             exes' := exe :: !exes'
  1181:         end else
  1182:           exes' := exe :: !exes'
  1183:       | _ ->
  1184:         exes' := exe :: !exes'
  1185:       end
  1186:     | _ -> exes' := exe :: !exes'
  1187:     )
  1188:     xs
  1189:   )
  1190:   exes
  1191:   ;
  1192:   rev !exes'
  1193: 
  1194: and remove_unused_children syms (uses,child_map,bbdfns) i =
  1195:   let desc = descendants child_map i in
  1196:   if desc <> IntSet.empty then begin
  1197:     (* all the descendants of a routine, excluding self *)
  1198:     (*
  1199:     print_endline "CANDIDATE FOR CHILD REMOVAL";
  1200:     print_function syms.dfns bbdfns i;
  1201:     print_endline ("Descendants of " ^ si i ^ " =" ^ IntSet.fold (fun j s -> s ^ " " ^ si j) desc "");
  1202:     IntSet.iter (fun i-> print_function syms.dfns bbdfns i) desc;
  1203:     *)
  1204: 
  1205: 
  1206:     (* everything used by this routine directly or indirectly *)
  1207:     let used = Flx_call.use_closure uses i in
  1208: 
  1209:     (*
  1210:     print_endline ("Usage closure of " ^ si i ^ " =" ^ IntSet.fold (fun j s -> s ^ " " ^ si j) used "");
  1211:     *)
  1212:     (* any desendants not used by this routine *)
  1213:     let unused_descendants = IntSet.diff desc used in
  1214: 
  1215:     (* remove the item *)
  1216:     IntSet.iter
  1217:     (fun i ->
  1218:       begin
  1219:         try
  1220:           (* any parent disowns the child *)
  1221:           match Hashtbl.find bbdfns i with
  1222:           | _,Some parent,_,_ -> remove_child child_map parent i
  1223:           | _ -> ()
  1224:         with Not_found -> ()
  1225:       end
  1226:       ;
  1227: 
  1228:       (* remove from symbol table, child map, and usage map *)
  1229:       Hashtbl.remove bbdfns i;
  1230:       Hashtbl.remove child_map i;
  1231:       Hashtbl.remove uses i;
  1232:       if syms.compiler_options.print_flag then
  1233:         print_endline ("REMOVED CHILD SYMBOL " ^ qualified_name_of_index syms.dfns i)
  1234:     )
  1235:     unused_descendants
  1236:   end
  1237: 
  1238: and heavily_inline_bbdcl syms (uses,child_map,bbdfns) excludes i =
  1239:   let specs =
  1240:     try Some (Hashtbl.find bbdfns i)
  1241:     with Not_found -> None
  1242:   in
  1243:   match specs with None -> () | Some spec ->
  1244:   match spec with
  1245:   | id,parent,sr,`BBDCL_procedure (props,vs,(ps,traint),exes) ->
  1246:     (*
  1247:     print_endline ("HIB: consider procedure " ^ id ^ "<"^ si i ^ "> for inlinable calls");
  1248:     *)
  1249:     if not (mem `Inlining_started props) then begin
  1250:       let props = `Inlining_started :: props in
  1251:       let data = id,parent,sr,`BBDCL_procedure (props,vs,(ps,traint),exes) in
  1252:       Hashtbl.replace bbdfns i data;
  1253: 
  1254:       (* inline into all children first *)
  1255:       let children = find_children child_map i in
  1256:       iter (fun i-> heavily_inline_bbdcl syms (uses, child_map, bbdfns) excludes i) children;
  1257: 
  1258:       let xcls = Flx_tailit.exes_get_xclosures syms exes in
  1259:       IntSet.iter (fun i-> heavily_inline_bbdcl syms (uses, child_map, bbdfns) excludes i) xcls;
  1260: 
  1261:       if syms.compiler_options.print_flag then
  1262:       print_endline ("HIB: Examining procedure " ^ id ^ "<"^ si i ^ "> for inlinable calls");
  1263:       (*
  1264:       print_endline ("Input:\n" ^ catmap "\n" (string_of_bexe syms.dfns 0) exes);
  1265:       *)
  1266:       recal_exes_usage syms uses sr i ps exes;
  1267:       let exes = fold_vars syms (uses,child_map,bbdfns) i ps exes in
  1268:       recal_exes_usage syms uses sr i ps exes;
  1269:       (*
  1270:       print_endline (id ^ " Before inlining calls:\n" ^ catmap "\n" (string_of_bexe syms.dfns 0) exes);
  1271:       *)
  1272:       let exes = heavy_inline_calls syms (uses,child_map,bbdfns) vs i excludes exes in
  1273:       (*
  1274:       print_endline (id ^ " After inlining calls:\n" ^ catmap "\n" (string_of_bexe syms.dfns 0) exes);
  1275:       *)
  1276:       recal_exes_usage syms uses sr i ps exes;
  1277:       let exes = Flx_tailit.tailit syms (uses,child_map,bbdfns) i sr ps vs exes in
  1278:       (*
  1279:       print_endline (id ^ " After tailing:\n" ^ catmap "\n" (string_of_bexe syms.dfns 0) exes);
  1280:       *)
  1281:       let exes = check_reductions syms exes in
  1282:       recal_exes_usage syms uses sr i ps exes;
  1283:       let exes = fold_vars syms (uses,child_map,bbdfns) i ps exes in
  1284:       recal_exes_usage syms uses sr i ps exes;
  1285:       let exes = check_reductions syms exes in
  1286:       let exes = Flx_cflow.chain_gotos syms exes in
  1287:       let props = `Inlining_complete :: props in
  1288:       let data = id,parent,sr,`BBDCL_procedure (props,vs,(ps,traint),exes) in
  1289:       Hashtbl.replace bbdfns i data;
  1290:       recal_exes_usage syms uses sr i ps exes;
  1291:       remove_unused_children syms (uses,child_map,bbdfns) i;
  1292:       (*
  1293:       print_endline ("DONE Examining procedure " ^ id ^ "<"^ si i ^ "> for inlinable calls");
  1294:       print_endline ("OPTIMISED PROCEDURE BODY: " ^ id ^ " :\n" ^ catmap "\n" (string_of_bexe syms.dfns 2) exes);
  1295:       *)
  1296:     end
  1297: 
  1298:   | id,parent,sr,`BBDCL_function (props,vs,(ps,traint),ret,exes) ->
  1299:     if not (mem `Inlining_started props) then begin
  1300:       let props = `Inlining_started :: props in
  1301:       let data = id,parent,sr,`BBDCL_function (props,vs,(ps,traint),ret,exes) in
  1302:       Hashtbl.replace bbdfns i data;
  1303: 
  1304:       (* inline into all children first *)
  1305:       let children = find_children child_map i in
  1306:       iter (fun i-> heavily_inline_bbdcl syms (uses, child_map, bbdfns) excludes i) children;
  1307: 
  1308:       let xcls = Flx_tailit.exes_get_xclosures syms exes in
  1309:       IntSet.iter (fun i-> heavily_inline_bbdcl syms (uses, child_map, bbdfns) excludes i) xcls;
  1310: 
  1311:       if syms.compiler_options.print_flag then
  1312:       print_endline ("HIB:Examining function " ^ id ^"<" ^ si i ^ "> for inlinable calls");
  1313:       (*
  1314:       print_endline (id ^ " Input:\n" ^ catmap "\n" (string_of_bexe syms.dfns 0) exes);
  1315:       *)
  1316:       recal_exes_usage syms uses sr i ps exes;
  1317:       let exes = fold_vars syms (uses,child_map,bbdfns) i ps exes in
  1318:       recal_exes_usage syms uses sr i ps exes;
  1319:       let exes = heavy_inline_calls syms (uses,child_map,bbdfns) vs i excludes exes in
  1320:       (*
  1321:       print_endline (id ^ " After inlining calls:\n" ^ catmap "\n" (string_of_bexe syms.dfns 0) exes);
  1322:       *)
  1323:       (*
  1324:       print_endline ("Tailing " ^ si i);
  1325:       *)
  1326:       recal_exes_usage syms uses sr i ps exes;
  1327:       let exes = Flx_tailit.tailit syms (uses,child_map,bbdfns) i sr ps vs exes in
  1328:       (*
  1329:       print_endline (id^ " After tailing(2):\n" ^ catmap "\n" (string_of_bexe syms.dfns 0) exes);
  1330:       *)
  1331:       let exes = check_reductions syms exes in
  1332:       recal_exes_usage syms uses sr i ps exes;
  1333:       let exes = fold_vars syms (uses,child_map,bbdfns) i ps exes in
  1334:       recal_exes_usage syms uses sr i ps exes;
  1335:       let exes = check_reductions syms exes in
  1336:       let exes = Flx_cflow.chain_gotos syms exes in
  1337:       let props = `Inlining_complete :: props in
  1338:       let data = id,parent,sr,`BBDCL_function (props,vs,(ps,traint),ret,exes) in
  1339:       Hashtbl.replace bbdfns i data;
  1340:       recal_exes_usage syms uses sr i ps exes;
  1341:       remove_unused_children syms (uses,child_map,bbdfns) i;
  1342:       (*
  1343:       print_endline ("DONE Examining function " ^ id ^"<" ^ si i ^ "> for inlinable calls");
  1344:       print_endline ("OPTIMISED FUNCTION BODY: " ^ id ^ " :\n" ^ catmap "\n" (string_of_bexe syms.dfns 2) exes);
  1345:       *)
  1346:     end
  1347:   | _ -> ()
  1348: 
  1349: let heavy_inlining syms
  1350:   (child_map,bbdfns)
  1351: =
  1352:   let used = ref (!(syms.roots)) in
  1353:   let (uses,usedby) = Flx_call.call_data syms bbdfns in
  1354: 
  1355:   while not (IntSet.is_empty !used) do
  1356:     let i = IntSet.choose !used in
  1357:     used := IntSet.remove i !used;
  1358:     heavily_inline_bbdcl syms (uses,child_map,bbdfns) [i] i
  1359:   done;
  1360: 
  1361:   Hashtbl.iter
  1362:     (fun i _ -> try heavily_inline_bbdcl syms (uses,child_map,bbdfns) [i] i with _ -> ())
  1363:   bbdfns
  1364: 
  1365: 
  1366: (* NOTES: this algorithm ONLY WORKS if inlining is attempted
  1367: in the corect order. Attempting to inline into children
  1368: before parents, when they're mutually recursive, spawns
  1369: clones infinitely, because we end up cloning a function
  1370: on the exclusion list, but not adding the clone to it.
  1371: 
  1372: 
  1373: NOTE!!!! THIS SHOULD BE FIXED NOW. WE NO LONGER
  1374: PERMIT INLINING RECURSIVE FUNCTIONS UNLESS THE CALL
  1375: IS TO A CHILD. A CALL TO SELF, PARENT OR SIBLING NEVER
  1376: DOES INLINING .. AND THERE ARE NO OTHER CASES.
  1377: 
  1378: INLINING KIDS IS MANDATORY FOR TAIL RECURSION OPTIMISATION.
  1379: 
  1380: So we end up recursing into the clone, and inlining
  1381: into it, which spawns more clones which are not
  1382: excluded, and haven't been inlined into yet.
  1383: 
  1384: This needs to be fixed so the algorithm is proven
  1385: to terminate and also be complete.
  1386: 
  1387: What we need (and is NOT implemented) is something like this:
  1388: 
  1389: Cloning nested functions is should not be needed in general.
  1390: If we proceed from leaves towards the root, we can eliminate
  1391: from each function any nested children, by simply inlining
  1392: them. So only variable children need cloning.
  1393: 
  1394: Two things stop this working:
  1395: 
  1396: (a) non-inline functions and
  1397: (b) recursion.
  1398: 
  1399: The current algorithm has been hacked to only handle the
  1400: call graph from the roots. It used to consider the useage
  1401: closure, however that started to fail when I added
  1402: 'pre-assigned' slot numbers (AST_index). Doing that meant
  1403: the natural order of the set wasn't a topological sort
  1404: of the parent-child order.
  1405: 
  1406: Unfortunately, the remaining recursive descent doesn't
  1407: proceed into noinline functions. Although these shouldn't
  1408: be inlined into their caller, that doesn't mean functions
  1409: shouldn't be inlined into them. Iterating over the usage
  1410: closure ensured noinline functions would still be inlined
  1411: into.
  1412: 
  1413: Recursive functions are a bit different: they currently
  1414: allow inlining, with a recursion stopper preventing
  1415: infinite recursion.
  1416: 
  1417: Unfortunately with a double nesting like this:
  1418: 
  1419:   fun f() { fun g() { fun h() { f(); } h(); } g(); }
  1420: 
  1421: trying to inline g into f causes h to be cloned.
  1422: But trying to inline f into the clone of h retriggers
  1423: the descent, causing the clone to be recloned, and
  1424: the recursion stopper doesn't prevent this, since it
  1425: isn't the same routine being inlined twice (just a clone
  1426: of it ..)
  1427: 
  1428: The thing is.. we HAVE to inline the original routine
  1429: AND the clone for completeness, since both may be
  1430: called independently, so even if we could clone the
  1431: recursion stoppers, it wouldn't work.
  1432: 
  1433: The only solution I can think of is to guarrantee that
  1434: you can only clone a routine that is inlined into
  1435: already (as fas as possible) so that no attempt will
  1436: be made to inline into the clone either.
  1437: --------------------------------------------------------------
  1438: Hum.... When I inline A -> B -> C -> A (all kid inlines) the
  1439: inline of A into C is done first. This creates clones B' and C'.
  1440: When we rescan the code to be put into C, we would try to
  1441: inline B' into it, and C' into that .. but C' is a cloned sibling
  1442: of C, and not the same function. So we try to inline into C',
  1443: and inlining A is allowed there .. which causes an infinite
  1444: recursion.
  1445: 
  1446: *)
End ocaml section to src/flx_inline.ml[1]