:: Probability on Finite and Discrete Set and Uniform Distribution :: by Hiroyuki Okazaki :: :: Received May 5, 2009 :: Copyright (c) 2009-2012 Association of Mizar Users begin notation let S be set ; let s be FinSequence of S; synonym whole_event s for dom S; end; theorem Th1: :: DIST_1:1 for S being set for s being FinSequence of S holds whole_event = s " S proofend; notation let S be set ; let s be FinSequence of S; let x be set ; synonym event_pick (x,s) for Coim (S,s); end; definition let S be set ; let s be FinSequence of S; let x be set ; :: original:event_pick redefine func event_pick (x,s) -> Event of (whole_event ); correctness coherence event_pick (,x) is Event of (whole_event ); by RELAT_1:132; end; definition let S be finite set ; let s be FinSequence of S; let x be set ; func frequency (x,s) -> Nat equals :: DIST_1:def 1 card (event_pick (x,s)); correctness coherence card (event_pick (x,s)) is Nat; ; end; :: deftheorem defines frequency DIST_1:def_1_:_ for S being finite set for s being FinSequence of S for x being set holds frequency (x,s) = card (event_pick (x,s)); theorem :: DIST_1:2 for S being set for s being FinSequence of S for e being set st e in whole_event holds ex x being Element of S st e in event_pick (x,s) proofend; theorem Th3: :: DIST_1:3 for S being set for s being FinSequence of S holds card (whole_event ) = len s proofend; definition let S be finite set ; let s be FinSequence of S; let x be set ; func FDprobability (x,s) -> Real equals :: DIST_1:def 2 (frequency (x,s)) / (len s); correctness coherence (frequency (x,s)) / (len s) is Real; ; end; :: deftheorem defines FDprobability DIST_1:def_2_:_ for S being finite set for s being FinSequence of S for x being set holds FDprobability (x,s) = (frequency (x,s)) / (len s); theorem :: DIST_1:4 for S being finite set for s being FinSequence of S for x being set holds frequency (x,s) = (len s) * (FDprobability (x,s)) proofend; definition let S be finite set ; let s be FinSequence of S; func FDprobSEQ s -> FinSequence of REAL means :Def3: :: DIST_1:def 3 ( dom it = Seg (card S) & ( for n being Nat st n in dom it holds it . n = FDprobability (((canFS S) . n),s) ) ); existence ex b1 being FinSequence of REAL st ( dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds b1 . n = FDprobability (((canFS S) . n),s) ) ) proofend; uniqueness for b1, b2 being FinSequence of REAL st dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds b1 . n = FDprobability (((canFS S) . n),s) ) & dom b2 = Seg (card S) & ( for n being Nat st n in dom b2 holds b2 . n = FDprobability (((canFS S) . n),s) ) holds b1 = b2 proofend; end; :: deftheorem Def3 defines FDprobSEQ DIST_1:def_3_:_ for S being finite set for s being FinSequence of S for b3 being FinSequence of REAL holds ( b3 = FDprobSEQ s iff ( dom b3 = Seg (card S) & ( for n being Nat st n in dom b3 holds b3 . n = FDprobability (((canFS S) . n),s) ) ) ); theorem :: DIST_1:5 for S being finite set for s being FinSequence of S for x being set holds FDprobability (x,s) = prob (event_pick (x,s)) by Th3; definition let S be finite set ; let s, t be FinSequence of S; preds,t -are_prob_equivalent means :Def4: :: DIST_1:def 4 for x being set holds FDprobability (x,s) = FDprobability (x,t); reflexivity for s being FinSequence of S for x being set holds FDprobability (x,s) = FDprobability (x,s) ; symmetry for s, t being FinSequence of S st ( for x being set holds FDprobability (x,s) = FDprobability (x,t) ) holds for x being set holds FDprobability (x,t) = FDprobability (x,s) ; end; :: deftheorem Def4 defines -are_prob_equivalent DIST_1:def_4_:_ for S being finite set for s, t being FinSequence of S holds ( s,t -are_prob_equivalent iff for x being set holds FDprobability (x,s) = FDprobability (x,t) ); theorem Th6: :: DIST_1:6 for S being finite set for s, t, u being FinSequence of S st s,t -are_prob_equivalent & t,u -are_prob_equivalent holds s,u -are_prob_equivalent proofend; definition let S be finite set ; let s be FinSequence of S; func Finseq-EQclass s -> Subset of (S *) equals :: DIST_1:def 5 { t where t is FinSequence of S : s,t -are_prob_equivalent } ; correctness coherence { t where t is FinSequence of S : s,t -are_prob_equivalent } is Subset of (S *); proofend; end; :: deftheorem defines Finseq-EQclass DIST_1:def_5_:_ for S being finite set for s being FinSequence of S holds Finseq-EQclass s = { t where t is FinSequence of S : s,t -are_prob_equivalent } ; registration let S be finite set ; let s be FinSequence of S; cluster Finseq-EQclass s -> non empty ; coherence not Finseq-EQclass s is empty proofend; end; theorem Th7: :: DIST_1:7 for S being finite set for s, t being FinSequence of S holds ( s,t -are_prob_equivalent iff t in Finseq-EQclass s ) proofend; theorem Th8: :: DIST_1:8 for S being finite set for s being FinSequence of S holds s in Finseq-EQclass s ; theorem Th9: :: DIST_1:9 for S being finite set for s, t being FinSequence of S holds ( s,t -are_prob_equivalent iff Finseq-EQclass s = Finseq-EQclass t ) proofend; definition let S be finite set ; defpred S1[ set ] means ex u being FinSequence of S st $1 = Finseq-EQclass u; func distribution_family S -> Subset-Family of (S *) means :Def6: :: DIST_1:def 6 for A being Subset of (S *) holds ( A in it iff ex s being FinSequence of S st A = Finseq-EQclass s ); existence ex b1 being Subset-Family of (S *) st for A being Subset of (S *) holds ( A in b1 iff ex s being FinSequence of S st A = Finseq-EQclass s ) proofend; uniqueness for b1, b2 being Subset-Family of (S *) st ( for A being Subset of (S *) holds ( A in b1 iff ex s being FinSequence of S st A = Finseq-EQclass s ) ) & ( for A being Subset of (S *) holds ( A in b2 iff ex s being FinSequence of S st A = Finseq-EQclass s ) ) holds b1 = b2 proofend; end; :: deftheorem Def6 defines distribution_family DIST_1:def_6_:_ for S being finite set for b2 being Subset-Family of (S *) holds ( b2 = distribution_family S iff for A being Subset of (S *) holds ( A in b2 iff ex s being FinSequence of S st A = Finseq-EQclass s ) ); registration let S be finite set ; cluster distribution_family S -> non empty ; coherence not distribution_family S is empty proofend; end; theorem Th10: :: DIST_1:10 for S being finite set for s, t being FinSequence of S holds ( s,t -are_prob_equivalent iff FDprobSEQ s = FDprobSEQ t ) proofend; theorem :: DIST_1:11 for S being finite set for s, t being FinSequence of S st t in Finseq-EQclass s holds FDprobSEQ s = FDprobSEQ t proofend; definition let S be finite set ; func GenProbSEQ S -> Function of (distribution_family S),(REAL *) means :Def7: :: DIST_1:def 7 for x being Element of distribution_family S ex s being FinSequence of S st ( s in x & it . x = FDprobSEQ s ); existence ex b1 being Function of (distribution_family S),(REAL *) st for x being Element of distribution_family S ex s being FinSequence of S st ( s in x & b1 . x = FDprobSEQ s ) proofend; uniqueness for b1, b2 being Function of (distribution_family S),(REAL *) st ( for x being Element of distribution_family S ex s being FinSequence of S st ( s in x & b1 . x = FDprobSEQ s ) ) & ( for x being Element of distribution_family S ex s being FinSequence of S st ( s in x & b2 . x = FDprobSEQ s ) ) holds b1 = b2 proofend; end; :: deftheorem Def7 defines GenProbSEQ DIST_1:def_7_:_ for S being finite set for b2 being Function of (distribution_family S),(REAL *) holds ( b2 = GenProbSEQ S iff for x being Element of distribution_family S ex s being FinSequence of S st ( s in x & b2 . x = FDprobSEQ s ) ); theorem Th12: :: DIST_1:12 for S being finite set for s being FinSequence of S holds (GenProbSEQ S) . (Finseq-EQclass s) = FDprobSEQ s proofend; registration let S be finite set ; cluster GenProbSEQ S -> one-to-one ; coherence GenProbSEQ S is one-to-one proofend; end; definition let S be finite set ; let p be ProbFinS FinSequence of REAL ; assume A1: ex s being FinSequence of S st FDprobSEQ s = p ; func distribution (p,S) -> Element of distribution_family S means :Def8: :: DIST_1:def 8 (GenProbSEQ S) . it = p; existence ex b1 being Element of distribution_family S st (GenProbSEQ S) . b1 = p proofend; uniqueness for b1, b2 being Element of distribution_family S st (GenProbSEQ S) . b1 = p & (GenProbSEQ S) . b2 = p holds b1 = b2 proofend; end; :: deftheorem Def8 defines distribution DIST_1:def_8_:_ for S being finite set for p being ProbFinS FinSequence of REAL st ex s being FinSequence of S st FDprobSEQ s = p holds for b3 being Element of distribution_family S holds ( b3 = distribution (p,S) iff (GenProbSEQ S) . b3 = p ); definition let S be finite set ; let s be FinSequence of S; func freqSEQ s -> FinSequence of NAT means :Def9: :: DIST_1:def 9 ( dom it = Seg (card S) & ( for n being Nat st n in dom it holds it . n = (len s) * ((FDprobSEQ s) . n) ) ); existence ex b1 being FinSequence of NAT st ( dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds b1 . n = (len s) * ((FDprobSEQ s) . n) ) ) proofend; uniqueness for b1, b2 being FinSequence of NAT st dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds b1 . n = (len s) * ((FDprobSEQ s) . n) ) & dom b2 = Seg (card S) & ( for n being Nat st n in dom b2 holds b2 . n = (len s) * ((FDprobSEQ s) . n) ) holds b1 = b2 proofend; end; :: deftheorem Def9 defines freqSEQ DIST_1:def_9_:_ for S being finite set for s being FinSequence of S for b3 being FinSequence of NAT holds ( b3 = freqSEQ s iff ( dom b3 = Seg (card S) & ( for n being Nat st n in dom b3 holds b3 . n = (len s) * ((FDprobSEQ s) . n) ) ) ); theorem Th13: :: DIST_1:13 for S being non empty finite set for s being non empty FinSequence of S for n being Nat st n in Seg (card S) holds ex x being Element of S st ( (freqSEQ s) . n = frequency (x,s) & x = (canFS S) . n ) proofend; theorem Th14: :: DIST_1:14 for S being finite set for s being FinSequence of S holds freqSEQ s = (len s) * (FDprobSEQ s) proofend; theorem Th15: :: DIST_1:15 for S being finite set for s being FinSequence of S holds Sum (freqSEQ s) = (len s) * (Sum (FDprobSEQ s)) proofend; theorem :: DIST_1:16 for S being non empty finite set for s being non empty FinSequence of S for n being Nat st n in dom s holds ex m being Nat st ( (freqSEQ s) . m = frequency ((s . n),s) & s . n = (canFS S) . m ) proofend; Lem1: for S being Function for X being set st S is disjoint_valued holds S | X is disjoint_valued proofend; Th17: for n being Nat for S being Function for L being FinSequence of NAT st S is disjoint_valued & dom S = dom L & n = len L & ( for i being Nat st i in dom S holds ( S . i is finite & L . i = card (S . i) ) ) holds ( Union S is finite & card (Union S) = Sum L ) proofend; theorem Th18: :: DIST_1:17 for S being Function for L being FinSequence of NAT st S is disjoint_valued & dom S = dom L & ( for i being Nat st i in dom S holds ( S . i is finite & L . i = card (S . i) ) ) holds ( Union S is finite & card (Union S) = Sum L ) proofend; theorem Th19: :: DIST_1:18 for S being non empty finite set for s being non empty FinSequence of S holds Sum (freqSEQ s) = len s proofend; theorem Th20: :: DIST_1:19 for S being non empty finite set for s being non empty FinSequence of S holds Sum (FDprobSEQ s) = 1 proofend; registration let S be non empty finite set ; let s be non empty FinSequence of S; cluster FDprobSEQ s -> ProbFinS ; coherence FDprobSEQ s is ProbFinS proofend; end; definition let S be non empty finite set ; mode distProbFinS of S -> ProbFinS FinSequence of REAL means :Def10: :: DIST_1:def 10 ( len it = card S & ex s being FinSequence of S st FDprobSEQ s = it ); existence ex b1 being ProbFinS FinSequence of REAL st ( len b1 = card S & ex s being FinSequence of S st FDprobSEQ s = b1 ) proofend; end; :: deftheorem Def10 defines distProbFinS DIST_1:def_10_:_ for S being non empty finite set for b2 being ProbFinS FinSequence of REAL holds ( b2 is distProbFinS of S iff ( len b2 = card S & ex s being FinSequence of S st FDprobSEQ s = b2 ) ); theorem Th22: :: DIST_1:20 for S being non empty finite set for p being distProbFinS of S holds ( distribution (p,S) is Element of distribution_family S & (GenProbSEQ S) . (distribution (p,S)) = p ) proofend; begin definition let S be finite set ; let s be FinSequence of S; attrs is uniformly_distributed means :Def11: :: DIST_1:def 11 for n being Nat st n in dom (FDprobSEQ s) holds (FDprobSEQ s) . n = 1 / (card S); end; :: deftheorem Def11 defines uniformly_distributed DIST_1:def_11_:_ for S being finite set for s being FinSequence of S holds ( s is uniformly_distributed iff for n being Nat st n in dom (FDprobSEQ s) holds (FDprobSEQ s) . n = 1 / (card S) ); theorem Th23: :: DIST_1:21 for S being finite set for s being FinSequence of S st s is uniformly_distributed holds FDprobSEQ s is constant proofend; theorem Th24: :: DIST_1:22 for S being finite set for s, t being FinSequence of S st s is uniformly_distributed & s,t -are_prob_equivalent holds t is uniformly_distributed proofend; theorem Th25: :: DIST_1:23 for S being finite set for s, t being FinSequence of S st s is uniformly_distributed & t is uniformly_distributed holds s,t -are_prob_equivalent proofend; registration let S be finite set ; cluster canFS S -> uniformly_distributed ; coherence canFS S is uniformly_distributed proofend; end; Lm2: for S being finite set for s being FinSequence of S st s in Finseq-EQclass (canFS S) holds s is uniformly_distributed proofend; Lm3: for S being finite set for s being FinSequence of S st s is uniformly_distributed holds for t being FinSequence of S st t is uniformly_distributed holds t in Finseq-EQclass s proofend; definition let S be finite set ; func uniform_distribution S -> Element of distribution_family S means :Def12: :: DIST_1:def 12 for s being FinSequence of S holds ( s in it iff s is uniformly_distributed ); existence ex b1 being Element of distribution_family S st for s being FinSequence of S holds ( s in b1 iff s is uniformly_distributed ) proofend; uniqueness for b1, b2 being Element of distribution_family S st ( for s being FinSequence of S holds ( s in b1 iff s is uniformly_distributed ) ) & ( for s being FinSequence of S holds ( s in b2 iff s is uniformly_distributed ) ) holds b1 = b2 proofend; end; :: deftheorem Def12 defines uniform_distribution DIST_1:def_12_:_ for S being finite set for b2 being Element of distribution_family S holds ( b2 = uniform_distribution S iff for s being FinSequence of S holds ( s in b2 iff s is uniformly_distributed ) ); registration let S be non empty finite set ; cluster non empty Relation-like NAT -defined REAL -valued Function-like constant finite V40() V41() V42() FinSequence-like FinSubsequence-like V70() ProbFinS for distProbFinS of S; existence ex b1 being distProbFinS of S st b1 is constant proofend; end; definition let S be non empty finite set ; func Uniform_FDprobSEQ S -> distProbFinS of S equals :: DIST_1:def 13 FDprobSEQ (canFS S); coherence FDprobSEQ (canFS S) is distProbFinS of S proofend; end; :: deftheorem defines Uniform_FDprobSEQ DIST_1:def_13_:_ for S being non empty finite set holds Uniform_FDprobSEQ S = FDprobSEQ (canFS S); registration let S be non empty finite set ; cluster Uniform_FDprobSEQ S -> constant ; coherence Uniform_FDprobSEQ S is constant by Th23; end; theorem :: DIST_1:24 for S being non empty finite set holds uniform_distribution S = distribution ((Uniform_FDprobSEQ S),S) proofend;