(*-------------------------------------------------------------------------- *)
(* Organize the libraries by theorems, types and constants. Give also        *)
(* additional information about the theorems such as their dependencies and  *)
(* originated theories (deph), their role (roleh) and the order (thmlo) in   *)
(* which the theorems were proved inside their theory.                       *)
(*-------------------------------------------------------------------------- *)

open Hh_parse
open Hh_term
open Toolbox
open Preprocess

let split_flag = ref false 
let fof_flag = ref false
let thf_flag = ref false
let lisp_flag = ref false
let arity_flag = ref false
let smaller_thm_flag = ref false
let big_thml_glob = ref []

let max_constants = 40000
let max_thms = 100000

let rec hh_to_tt formula = match formula with
  | Id v -> v
  | Comb(Id "!", Abs (v,ty,tm)) -> 
    "(" ^ "! " ^ "[" ^ v ^ ": " ^ hh_to_tt ty ^ "] : " ^ hh_to_tt tm ^ ")"
  | Comb(Id "?", Abs (v,ty,tm)) -> 
    "(" ^ "? " ^ "[" ^ v ^ ": " ^ hh_to_tt ty ^ "] : " ^ hh_to_tt tm ^ ")"
  | Comb(Comb(Id("&"),f1),f2) -> 
    "(" ^ hh_to_tt f1 ^ " & " ^ hh_to_tt f2 ^ ")"
  | Comb(Comb(Id("|"),f1),f2) -> 
    "(" ^ hh_to_tt f1 ^ " | " ^ hh_to_tt f2 ^ ")"
  | Comb(Comb(Id("=>"),f1),f2) -> 
    "(" ^ hh_to_tt f1 ^ " => " ^ hh_to_tt f2 ^ ")"
  | Comb(Id("~"),f1) -> 
    "(~ " ^ hh_to_tt f1 ^ ")"
  | Comb(Comb(Id("="),f1),f2) -> 
    "(" ^ hh_to_tt f1 ^ " = " ^ hh_to_tt f2 ^ ")"
  | Comb(Comb(Id("<=>"),f1),f2) -> 
    "(" ^ hh_to_tt f1 ^ " <=> " ^ hh_to_tt f2 ^ ")"
  | Abs (v,ty,tm) -> 
    "(" ^ "^ " ^ "[" ^ v ^ ": " ^ hh_to_tt ty ^ "] : " ^ hh_to_tt tm ^ ")"
  | Comb (x,y)    -> "(" ^ hh_to_tt x ^  " " ^ hh_to_tt y ^ ")"

(*-------------------------------------------------------------------------- 
  Tmll manipulation
  -------------------------------------------------------------------------- *)

let drop_tt (_,s,r,t) = (s,r,t)
let drop_role (s,r,t) = (s,t)
let drop_term (s,r,t) = (s,r)
let obtain_name (s,r,t) = s

let flatten_tmll l = List.concat (List.map snd l)
let foreach_filter f tmll =
  List.map (fun (thy,l) -> (thy,List.filter f l)) tmll
let foreach_map f tmll = 
  List.map (fun (thy,l) -> (thy,List.map f l)) tmll

let foreach_concat_map f tmll = 
  let g l = List.concat (List.map f l) in
  List.map (fun (thy,l) -> (thy, g l)) tmll

let foreach_iter f tmll = 
  List.iter (fun (_,l) -> List.iter f l) tmll

(*-------------------------------------------------------------------------- 
  Used?
  -------------------------------------------------------------------------- *)

let rec cl_of_hh_aux vl tm = match tm with
  | Id x -> if List.mem x vl then [] else [x]
  | Comb(x,y) -> cl_of_hh_aux vl x @ cl_of_hh_aux vl y
  | Abs(v,ty,tm) -> cl_of_hh_aux (v :: vl) ty @ cl_of_hh_aux (v :: vl) tm

let cl_of_hh tm = mk_fast_set compare (cl_of_hh_aux [] tm)

(*-------------------------------------------------------------------------- 
  Object partition
  -------------------------------------------------------------------------- *)

let rec is_type_aux = function
  | Id "$t" -> true
  | Id "$tType" -> true
  | Comb (Comb (Id ">", Id "$t"), x) -> is_type_aux x
  | Comb (Comb (Id ">", Id "$tType"), x) -> is_type_aux x
  | Comb (Comb (Id "=>", Id "$t"), x) -> is_type_aux x
  | Comb (Comb (Id "=>", Id "$tType"), x) -> is_type_aux x
  | _ -> false;;

let is_type = function
  | Comb (Comb (Id ":", x), y) -> is_type_aux y
  | y -> is_type_aux y;;

let rec strip_comb formula = match formula with
  | Comb (f1,f2) -> let (oper,argl) = strip_comb f1 in 
                           (oper, (argl @ [f2])) (* could be optimized *)
  | _          -> (formula,[])

let rec list_mk_comb (oper,argl) = match argl with
  | [] -> oper
  | a :: m -> list_mk_comb (Comb (oper,a), m)

let append_prime x y = add_prime (remove_prime x ^ y)

(*-------------------------------------------------------------------------- 
  Constructors of hh
  -------------------------------------------------------------------------- *)

let rec mk_varl_aux s max = 
  if max > 0 
  then append_prime s (string_of_int (max - 1)) :: mk_varl_aux s (max - 1)
  else []

let rec mk_varl s max = List.rev (mk_varl_aux s max)

let mk_eq (tm1,tm2) = Comb (Comb (Id "=", tm1), tm2)

let rec list_mk_comb (oper,argl) = match argl with
  | [] -> oper
  | arg :: m -> list_mk_comb (Comb(oper,arg), m)

let mk_forall ((n,ty),t) = Comb (Id "!",Abs (n, ty, t))

let rec list_mk_forall (vl,t) = match vl with
  | [] -> t
  | v :: m -> mk_forall (v, list_mk_forall (m,t))


let rec id_replace a b tm = match tm with
  | Id x -> if x = a then Id b else tm
  | Comb(x,y) -> Comb(id_replace a b x, id_replace a b y)
  | Abs(v,ty,tm) -> Abs(begin if v = a then b else v end, 
                        id_replace a b ty, id_replace a b tm)


let rec strip_type vl ty = 
  if vl = [] then [] else begin
  match ty with
  | Comb (Id "!", Abs (n, ty', tm')) ->
    let newtm' = id_replace n (List.hd vl) tm' in
    (List.hd vl,ty') :: strip_type (List.tl vl) newtm'
  | Comb (Comb (Id ">", ty1), ty2) -> 
    (List.hd vl,ty1) :: strip_type (List.tl vl) ty2 
  | _ -> if List.length vl <> 1 
         then failwith ("strip_type: " ^ String.concat " " vl ^ ": " 
                           ^ hh_to_tt ty)
         else [(List.hd vl, ty)]
  end

let rec strip_type_no_subst ty = 
  match ty with
  | Comb (Id "!", Abs (n, ty', tm')) ->
    ty' :: strip_type_no_subst tm'
  | Comb (Comb (Id ">", ty1), ty2) -> 
    ty1 :: strip_type_no_subst ty2
  | _    -> [ty]

let mk_conj (tm1,tm2) = Comb (Comb (Id "&", tm1), tm2)

let mk_imp (tm1,tm2) = Comb (Comb (Id "=>", tm1), tm2)

let rec list_mk_conj tml = match tml with
  | [] -> failwith "list_mk_conj"
  | [tm] -> tm
  | tm :: m -> mk_conj (tm, list_mk_conj m)



(*-------------------------------------------------------------------------- 
  Parsing names
  -------------------------------------------------------------------------- *)

let rename_obj (s,r,t) = match t with
  | Comb (Comb (Id ":", Id x), y) -> (x,r,y)
  | _ -> (s,r,t)

let is_thm_role role = 
  role = "ax" || role = "def" || role = "axiom" || role = "definition"
let is_thm (_,role,_) = is_thm_role role
let is_tyc (_,role,_) = role = "ty" || role = "type"
let is_ty (_,_,t) = is_type t

let is_thm_cj (_,role,_) =
  role = "ax" || role = "def" || role = "axiom" || role = "definition" ||
  role = "cj" || role = "conjecture"

(* Warning: here all non constants are assumed to be theorems *)
let get_tyl_cl_thml l = 
  let (tycl,thml) = List.partition is_tyc l in 
  let (tyl,cl) = List.partition is_ty tycl in
  let (tyl,cl,thml) = (List.map drop_role tyl,
                       List.map drop_role cl, 
                       List.map drop_role thml) 
  in
  (tyl,cl,thml)

(*-------------------------------------------------------------------------- 
  Ordering constants (obj here but should be changed to c)
  -------------------------------------------------------------------------- *)

let get_cl objh t =
  let rl = ref [] in
  let rec loop t = match t with
  | Id x -> (if Hashtbl.mem objh x then rl := x :: !rl else ())
  | Comb (x, y) -> (loop x; loop y)
  | Abs (v,ty,tm) -> (loop ty; loop tm)
  in
  loop t;
  mk_fast_set compare !rl

let order_objl objl = 
  let objh = hash_of_alist objl in
  let graph = List.map (fun (x,t) -> (x, get_cl objh t)) objl in
  let sortedl = topo_sort graph in
  List.map (fun x -> (x, Hashtbl.find objh x)) sortedl

(*-------------------------------------------------------------------------- 
  Main: initialization
  -------------------------------------------------------------------------- *)

let print_info (dir,tyl,cl,thml,thmlo) =
  log_endline ("Loading objects from " ^ dir);
  log_endline ("- types: " ^ string_of_int (List.length tyl));
  log_endline ("- constants: " ^ string_of_int (List.length cl));
  log_endline ("- theorems: " ^ string_of_int (List.length thml));
  log_endline ("- theories: "^ string_of_int (List.length thmlo) ^ "\n")

let init_dir dir =
  log_endline ("Reading");
  let (deph,tmll) = Read.read_dir dir in
  let tmll = foreach_map drop_tt tmll in
  let tmll = foreach_map rename_obj tmll in
  (* Remove big theorems *)
  let tmll =
    if !smaller_thm_flag
    then 
      begin
      big_thml_glob := [];  
      foreach_filter 
         (fun (s,r,t) -> 
         if (is_thm_cj (s,r,t) && size_of t >= 40000) 
         then (big_thml_glob := s :: !big_thml_glob; false)
         else true
         )
      tmll
      end 
    else tmll
  in
  let deph = 
     if !smaller_thm_flag 
     then 
       begin
       log_endline ("Removing " ^ string_of_int (List.length !big_thml_glob) ^ 
       " big theorems");
       remove_deph !big_thml_glob deph
       end
     else deph
  in
  (* Split theorems (not constants nor types) *)
  let tmll_thm = foreach_filter (is_thm_cj) tmll in
  let tmll_tyc = foreach_filter (not ++ is_thm_cj) tmll in
  let (deph,tmll_thm) = 
    if !split_flag 
    then (log_endline ("Splitting theorems");
          split_lib (deph,tmll_thm))
    else (deph,tmll_thm)
  in
  let merge_tmll tmll1 tmll2 = 
    let thyl = mk_set (List.map fst tmll1 @ List.map fst tmll2) in
    let f thy = 
      (thy, (try List.assoc thy tmll1 with _ -> []) @ 
            (try List.assoc thy tmll2 with _ -> []))
    in
    List.map f thyl
  in
  let tmll = merge_tmll tmll_tyc tmll_thm in
  (* Obtain constants and types *)
  let (tyl,cl,_) = get_tyl_cl_thml (flatten_tmll tmll) in
  (* Tmll only theorems *)
  let tmll = foreach_filter (is_thm_cj) tmll in
  let thml = List.map drop_role (flatten_tmll tmll) in
  let thmlo = foreach_map obtain_name tmll in 
  let roleh = hash_of_alist (List.map drop_term (flatten_tmll tmll)) in
  print_info (dir,tyl,cl,thml,thmlo);
  ((tyl,order_objl cl,thml),(deph,roleh,thmlo))


(* Initialization from a thf file *)
let init_file file =
  let tmll = Read.read_file file in
  let tmll = foreach_map drop_tt tmll in
  let tmll = foreach_map rename_obj tmll in
  let (tyl,cl,thml) = get_tyl_cl_thml (flatten_tmll tmll) in
  let tmll = foreach_filter (is_thm_cj) tmll in
  let thmlo = foreach_map obtain_name tmll in 
  let roleh = hash_of_alist (List.map drop_term (flatten_tmll tmll)) in
  print_info (file,tyl,cl,thml,thmlo);
  ((tyl,order_objl cl,thml),(roleh,thmlo))

(*--------------------------------------------------------------------------
  HH Printer 
  -------------------------------------------------------------------------- *)

let rec hh_to_fof formula = match formula with
  | Id v -> v
  | Comb(Id "!", Abs (v,ty,tm)) -> 
    "(" ^ "! " ^ "[" ^ v ^ "] : " ^ hh_to_fof tm ^ ")"
  | Comb(Id "?", Abs (v,ty,tm)) -> 
    "(" ^ "? " ^ "[" ^ v ^ "] : " ^ hh_to_fof tm ^ ")"
  | Comb(Comb(Id("&"),f1),f2) -> 
    "(" ^ hh_to_fof f1 ^ " & " ^ hh_to_fof f2 ^ ")"
  | Comb(Comb(Id("|"),f1),f2) -> 
    "(" ^ hh_to_fof f1 ^ " | " ^ hh_to_fof f2 ^ ")"
  | Comb(Comb(Id("=>"),f1),f2) -> 
    "(" ^ hh_to_fof f1 ^ " => " ^ hh_to_fof f2 ^ ")"
  | Comb(Id("~"),f1) -> 
    "(~ " ^ hh_to_fof f1 ^ ")"
  | Comb(Comb(Id("="),f1),f2) -> 
    "(" ^ hh_to_fof f1 ^ " = " ^ hh_to_fof f2 ^ ")"
  | Comb(Comb(Id("<=>"),f1),f2) -> 
    "(" ^ hh_to_fof f1 ^ " <=> " ^ hh_to_fof f2 ^ ")"
  | Abs (v,ty,tm) -> "(" ^ "^ " ^ "[" ^ v ^ "] : " ^ hh_to_fof tm ^ ")"
  | Comb (x,y)    -> 
      let (oper, argl) = strip_comb formula in
      hh_to_fof oper ^ "(" ^ String.concat "," (List.map hh_to_fof argl) ^ ")"

(* Warning: doesn't parse in before_tptp *)
let rec hh_to_thf formula = match formula with
  | Id v -> v
  | Comb(Id "!", Abs (v,ty,tm)) -> 
    "(" ^ "! " ^ "[" ^ v ^ ": " ^ hh_to_thf ty ^ "] : " ^ hh_to_thf tm ^ ")"
  | Comb(Id "?", Abs (v,ty,tm)) -> 
    "(" ^ "? " ^ "[" ^ v ^ ": " ^ hh_to_thf ty ^ "] : " ^ hh_to_thf tm ^ ")"
  | Comb(Comb(Id("&"),f1),f2) -> 
    "(" ^ hh_to_thf f1 ^ " & " ^ hh_to_thf f2 ^ ")"
  | Comb(Comb(Id("|"),f1),f2) -> 
    "(" ^ hh_to_thf f1 ^ " | " ^ hh_to_thf f2 ^ ")"
  | Comb(Comb(Id("=>"),f1),f2) -> 
    "(" ^ hh_to_thf f1 ^ " => " ^ hh_to_thf f2 ^ ")"
  | Comb(Id("~"),f1) -> 
    "(~ " ^ hh_to_thf f1 ^ ")"
  | Comb(Comb(Id("="),f1),f2) -> 
    "(" ^ hh_to_thf f1 ^ " = " ^ hh_to_thf f2 ^ ")"
  | Comb(Comb(Id("<=>"),f1),f2) -> 
    "(" ^ hh_to_thf f1 ^ " <=> " ^ hh_to_thf f2 ^ ")"
  | Abs (v,ty,tm) -> 
    "(" ^ "^ " ^ "[" ^ v ^ ": " ^ hh_to_thf ty ^ "] : " ^ hh_to_thf tm ^ ")"
  | Comb (x,y)    -> "(" ^ hh_to_thf x ^  " @ " ^ hh_to_thf y ^ ")"

(* rlwrap ./top -I hh1
Init.init_dir (directory of the file should be .p );;
*)
