----------------------------------------------------------------------------------
-- Company: 
-- Engineer: 
-- 
-- Create Date:    21:20:54 07/16/2012 
-- Design Name: 
-- Module Name:    PE - Behavioral 
-- Project Name: 
-- Target Devices: 
-- Tool versions: 
-- Description: 
--
-- Dependencies: 
--
-- Revision: 
-- Revision 0.01 - File Created
-- Additional Comments: 
--
----------------------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
library NocLib ;
--use IEEE.STD_LOGIC_ARITH.ALL;
--use IEEE.STD_LOGIC_UNSIGNED.ALL;
use NocLib.CoreTypes.all;
use work.Packet_type.all;
use work.MPI_RMA.all;
use IEEE.NUMERIC_STD.ALL;


entity PE is
	Generic (DestId : natural );
    Port ( Instruction : out  STD_LOGIC_VECTOR (Word-1 downto 0);
           Instruction_en : out  STD_LOGIC;
			  Core_PushOut : in STD_LOGIC_VECTOR (Word-1 downto 0);
           clk : in  STD_LOGIC;
           reset : in  STD_LOGIC;
           Core_RAM_Data_Out : out  STD_LOGIC_VECTOR (Word-1 downto 0);
           Core_RAM_Data_In : in  STD_LOGIC_VECTOR (Word-1 downto 0);
           Core_RAM_WE : in  STD_LOGIC;
           Core_RAM_EN : in  STD_LOGIC;
           --Core_RAM_ENB : in  STD_LOGIC;
           Core_RAM_ADDRESS_WR : in  STD_LOGIC_VECTOR (ADRLEN-1 downto 0);
           Core_RAM_ADDRESS_RD : in  STD_LOGIC_VECTOR (ADRLEN-1 downto 0);
           Core_Hold_req : in  STD_LOGIC;
           Core_Hold_Ack : out  STD_LOGIC);
end PE;

architecture Behavioral of PE is
COMPONENT RAM_v
  generic (width : positive;size :positive);
	PORT(
		clka : IN std_logic;
		clkb : IN std_logic;
		wea : IN std_logic;
		ena : IN std_logic;
		enb : IN std_logic;
		addra : IN std_logic_vector;
		addrb : IN std_logic_vector;
		dia : IN std_logic_vector;          
		dob : OUT std_logic_vector
		);
	END COMPONENT;
--donnes du programme PE
	   --signaux pour l'interconnexionsignal datain :std_logic_vector(word-1 downto 0):= (others => '0');
	signal ram_we ,ram_ena,ram_enb,ramsel: std_logic:='0';
	signal pe_ram_we ,pe_ram_ena,pe_ram_enb: std_logic;
	signal pe_instr_en,pe_hold_ack: std_logic:='0';
	signal ram_do,ram_din:std_logic_vector(word-1 downto 0):= (others => '0');
	signal pe_ram_do,pe_ram_din:std_logic_vector(word-1 downto 0):= (others => '0');
	signal ram_addra,ram_addrb :std_logic_vector(ADRLEN-1 downto 0);
	signal pe_ram_addra,pe_ram_addrb :std_logic_vector(ADRLEN-1 downto 0);
	signal sram : typ_dpram;
	
	signal SrcAdr0,SrcAdr1,destAdr0,destAdr1,Datalen:std_logic_vector(word-1 downto 0);
	signal dpid,dpid_i : natural range 0 to 15:=DestId;
	signal MyRank :std_logic_vector(3 downto 0);
	signal Libr : Core_io;  --regroupe tous les signaux IO de la bibliothque
	signal Lib_Ready:std_logic; --indique que l'excution de la fonction est termine
	signal Lib_instr_ack : std_logic; -- l'instruction est copie dans le tampon FIFO
	signal Lib_Init : std_logic; -- l'initialisation est termine 
 --signaux pour la gestion de la MAE
 type typ_mae is (start,Fillmem,NextFill,InitApp,InitCompleted,GetRank1,GetRank2,GetRank3,writeptr,InstrCopy, 
 putdata,putdata2,putcompleted,getdata,getdata2,getcompleted,terminate,st_timeout);
signal dcount : natural range 0 to 255:=0; --permet de compter le packet de donnes envoyes
signal count,count_i : natural range 0 to 15:=0;


		signal RunState : typ_mae;
		signal Ram_busy :std_logic:='0';
begin
Inst_RAM_v: RAM_v generic map(width=>word,size=>ADRLEN)
	PORT MAP(
		clka =>clk,
		clkb => clk,
		wea => ram_we,
		ena => ram_ena,
		enb => ram_enb,
		addra => ram_addra,
		addrb =>ram_addrb,
		dia => ram_din,
		dob => ram_do 
	);
--================================================================
	--MUX de la RAM
                             	
Ram_mux: process (ramsel,pe_ram_addra,pe_ram_addrb,Core_ram_address_rd,Core_ram_address_wr,
						Core_ram_en,Core_ram_we,Core_ram_data_in,pe_ram_ena,pe_ram_enb,Ram_do,
						Pe_ram_din,Pe_ram_we	)
 begin 				
 case ramsel is
	
	when '1' =>
		ram_addra <= Core_ram_address_wr ;
		ram_addrb <= Core_ram_address_rd ;
		ram_ena <= Core_ram_en;
		ram_enb <= Core_ram_en;
		ram_we<= Core_ram_we;
		ram_din <= Core_ram_data_in;
		pe_ram_do<=(others=>'Z');
		Core_ram_data_out<=ram_do;
		
	when others =>
		ram_addra <=  pe_ram_addra;
		ram_addrb <=  pe_ram_addrb;
		ram_ena <= pe_ram_ena;
		ram_enb <= pe_ram_enb;
		ram_we<= pe_ram_we;
		ram_din <=pe_ram_din;
		Core_ram_data_out<=(others=>'Z');
		pe_ram_do<=ram_do;
end case ;
end process ;



Instruction_En<=PE_instr_EN; -- Libr.Instr_en; --********A changer **********
--=== !!!!! attention la suppression de la ligne ci-dessous empche ce 
-- composant de bien fonctionner !!! !!!!!!!!!!!!!!!!!!!!!!!
instruction<=std_logic_vector(to_unsigned(Core_upper_adr,8));

dpid<=dpid_i;

Lib_Instr_ack<=Core_Pushout(0); --l'instruction a t copi
Lib_init<=Core_Pushout(4); -- Initialized
-- pe_hold_req<=Core_hold_req;
--Core_hold_ack<=pe_hold_ack;

 hold:process (Core_Hold_Req,clk,reset)
 begin
 if rising_edge(clk) then
	if reset='1' then
		Core_Hold_Ack<='0';
	else
		if Core_Hold_Req='1' then
			
			ramsel<=not(ram_busy);
			Core_Hold_Ack<=not(ram_busy); --si la mmoire est occup, forc une libration
			Pe_hold_ack<=not(ram_busy);
		else
			Core_Hold_Ack<='0';
			ramsel<='0';
			Pe_hold_ack<='0';
			
		end if;
	end if;
 end if;
 end process hold;
--=======================================================================



--=======================================================================
--MAE du PE
--=======================================================================

 pPutGet:process(clk,Core_Pushout,Core_Hold_req,PE_hold_Ack,RamSel,PE_Ram_do)
 
	constant DATAPTR : natural :=256;
	variable bfill,destrank,pid,mport : natural range 0 to 15;
	variable fsrc,ret : natural range 0 to 15:=0;
	variable timeout,ct,dlen : natural range 0 to 255;
	variable adrToset,SrcAdr,DestAdr : std_logic_vector(ADRLEN-1 downto 0);
	variable iack : std_logic:='0';
	variable  adresse,adresse_rd :natural range 0 to 65536;
	variable status_reg,config_reg :std_logic_vector(Word-1 downto 0):=(others=>'0');
	begin
	--=== Partie combinatoire du process ===================================
			Libr.Instr_ack<=Core_pushout(0);
			Libr.InitOk<=Core_pushout(4);
			Libr.Hold_Req<=Core_Hold_req;
			Libr.Hold_Ack<=Pe_Hold_Ack;
			Libr.RamSel<=RamSel;
			sram.data_out<=PE_ram_do;
	--=== Fin de la partie combinatoire du process ==========================		
	if (clk'event and clk='1') then 
		if reset='1' then
					RunState<=start;
					
		else
			
			Libr.Instr_ack<=Core_pushout(0);
			Libr.InitOk<=Core_pushout(4);
			Libr.Hold_Req<=Core_Hold_req;
			Libr.Hold_Ack<=Pe_Hold_Ack;
			Libr.RamSel<=RamSel;
			sram.data_out<=PE_ram_do;
			case  RunState is
			when start =>
				Dcount<=0;
				if bfill=0 then -- si le nombre de bloc de mmoire remplis est vide
						RunState<=Fillmem;
				 end if;
				 Ram_busy<='0';
				 PE_Instr_En<='0';
				iack:='0';
				adresse:=DATAPTR;
				
				adresse_rd:=0;
				timeout:=0;
				dcount<=0;
			 when Fillmem =>
				if Ramsel='0' then 
					
					
					
					PE_Ram_din<=std_logic_vector(to_unsigned(dcount,8)); -- x"0f";
					PE_Instr_En<='0';
					dcount<=dcount+1;
					
					if dcount=50 then
						 bfill:=bfill+1;
						 
						 if bfill=4 then
						  RunState<=InitApp;
						 else
							RunState<=nextfill;
						end if;
					else
						adresse:=adresse+1;
						RunState<=Fillmem;
					end if;
			else -- attente de la libraion de la mmoire
			        timeout:=timeout+1;
					  if timeout=100 then
						RunState<=st_timeout;
					  end if;
							
			end if;
		when nextfill  =>   --prpare le prochain bloc mmoire qui sera rempli
				adresse:=100*bfill;
				dcount<=0;
				ct:=0;
				RunState<=Fillmem;
				PE_Instr_En<='0';
		when InitApp =>
				--code pour Init 
								
				pMPI_Init(ct,Libr,Clk,SRam);
				PE_Instr_EN<=Libr.instr_en;
				adresse:=to_integer(unsigned(sram.addr_wr));
				adresse_rd:=to_integer(unsigned(sram.addr_rd));
				PE_ram_din<=sram.data_in;
				
				--if Libr.InitOk='1' then 
				if ct=0 then 
					RunState<=GetRank1;
				end if;
				
		
		when writeptr =>
				PE_Instr_En<='0';
				if Ramsel= '0' then --s'assurer que le bus est disponible
					
					if dcount=0 then 
						PE_RAM_Din<=AdrToSet(Word-1 downto 0);
						dcount <=dcount+1;
						--adresse:=adresse+1; --prpare la prochaine criture
					elsif dcount=1 then
						dcount <=dcount+1;
						adresse:=adresse+1; --prpare la prochaine criture
						PE_RAM_Din<=AdrToSet(15 downto 8);
					elsif dcount=2 then -- ce cycle permet juste de vider le tampon d'criture en RAM
						ret:=fsrc;
						dcount<=0;
						timeout:=0;
						
								if fsrc=1 then
								RunState <= InitApp;
								elsif fsrc=2 then
								RunState <= putdata;
								elsif fsrc=3 then
								RunState <= getdata;
								else
								RunState <= start;
								end if;
							
					end if;
					
				end if;
		When InstrCopy =>
				if Lib_instr_ack='1' then
					RunState<=Writeptr;
					PE_instr_en<='0';
					iack:='1';
					
				else
					PE_Instr_en<='1';
				end if;
		
		when InitCompleted =>
				adresse:=CORE_BASE_ADR;
				
				status_reg:=status_reg or x"10";
				PE_Ram_din<=status_reg ;
				if Lib_Init='1' then
					RunState<=GetRank1;
					PE_Instr_En<='0';
					--instruction(5)<='1';
				else
					PE_Instr_En<='0';
				end if;
		when GetRank1 =>
		
			pMPI_Comm_rank(ct,Libr,sram,MPI_COMM_WORLD,MyRank);
			if ct=0 then
				RunState<=PutData2;
			end if;
			adresse_rd:=to_integer(unsigned(sram.addr_rd));
--			adresse_rd:=CORE_INIT_ADR+1;
--			if ramsel='0' then
--				RunState<=getrank2;
--			end if;
		when GetRank2 =>
			adresse_rd:=CORE_INIT_ADR+1;
			if ramsel='0' then
				RunState<=Getrank3;
			end if;	
		when GetRank3=>
			adresse_rd:=CORE_INIT_ADR+1;
			if ramsel='0' then
				RunState<=putdata2;
			end if;	
		when putdata => --construire le packet pour le Put
				if unsigned(MyRank) = 0 then 
						Destrank:=1;
					else
						DestRank:=0;
					end if;
				adresse_rd:=core_base_adr+Core_Rank2port_base+DestRank;
				
				PE_Instr_En<='0';
				timeout:=0;
				dcount<=0;
				fsrc:=2;
				adrToSet:=std_logic_vector(to_unsigned(core_put_adr,16));
				if ret/=fsrc then
				  adresse:=core_base_adr+2;
					RunState<=writeptr;
					ret:=0;
				else
					if Lib_instr_ack/='1' then 
						RunState<= putdata2;
					end if;
				end if;
				
		when putdata2 =>
					
					if unsigned(MyRank) = 0 then 
						Destrank:=1;
					else
						DestRank:=0;
					end if;
					dlen:=10; --- to_integer(unsigned(datalen));
					SrcAdr:=std_logic_vector(to_unsigned(DATAPTR,ADRLEN));
					DestAdr:=X"2000";
					
					pMPI_put(ct,Libr,Clk,Sram,SrcAdr,Dlen,MPI_int,destrank,DestAdr1 & DestAdr,Dlen,Mpi_int,Default_win);
					adresse:=to_integer(unsigned(sram.addr_wr));
					adresse_rd:=to_integer(unsigned(sram.addr_rd));
					PE_Instr_EN<=Libr.instr_en;
					PE_ram_din<=sram.data_in;
					dcount<=ct;
					
					if ct=0 then
						RunState<=GetData;
					end if;
						
--						if dcount<=6 then
--						
--						elsif dcount=7 then
--		 
--							PE_Instr_En<='1';
--									
--						end if;

--			elsif PE_instr_En='0' then
--					timeout:=timeout+1;
--					if timeout>=10 then -- reprendre le contrle du Bus de force si ncessaire
--						ram_busy<='1';
--						timeout:=0;
--						PE_Instr_En<='0';
--					end if;
--			end if;
--				if dcount >=6 then
--						Ram_busy<='0';--librer le bus et attendre la rponse du Core MPI
--						if Lib_instr_ack='1' then -- Instruction ack
--						  PE_Instr_En<='0'; 
--							if Ramsel='0' then
--								adresse:=core_base_adr+1;
--								config_reg:=config_reg and x"f6";
--								PE_Ram_din<=config_reg ; --ramener le IPulse  0;
--								Ram_busy<='0';
--								RunState<=putcompleted;
--							else
--								
--								Ram_busy<='1';  --force la prise du bus
--							end if;
--						else
--						
--						  timeout:=timeout+1; 
--						  if timeout=150 then 
--								RunState<=st_timeout;
--							end if;
--						end if;
--				end if;
			
			
			when putcompleted =>
				adresse_rd:=core_put_adr+6;
				if PE_Ram_do(0)='1' then  --Put completed
					RunState<=GetData;
				end if;
				PE_Instr_En<='0';
			when getdata =>   --positionnement du mot de longueur des donnes
			   
				--DestRank:=1;
				timeout:=0;
				dcount<=0;
					fsrc:=3;
				adrToSet:=std_logic_vector(to_unsigned(core_get_adr,16));
				if ret/=fsrc then
					adresse:=core_base_adr+2;
					RunState<=writeptr;
					ret:=0;
				else
					adresse:=core_get_adr;
					RunState<= getdata2;
				end if;
			    PE_Instr_En<='0';
				when getdata2 =>
					
					if ramsel='0' then
						if dcount<=6 then
							
							if dcount=0 then 
								adresse:=core_get_adr;
							PE_Ram_din<=MPI_GET & std_logic_vector(to_unsigned(DestRank,4));
							elsif dcount=1 then
							adresse:=core_get_adr+dcount;
							PE_Ram_din<=Datalen ;
							elsif dcount=2 then
							adresse:=core_get_adr+dcount;
							PE_Ram_din<=SrcAdr1 ;
							elsif dcount=3 then
							adresse:=core_get_adr+dcount;
							PE_Ram_din<=SrcAdr0 ;
							elsif dcount=4 then
							adresse:=core_get_adr+dcount;
							PE_Ram_din<=DestAdr1 ;
							elsif dcount=5 then
							adresse:=core_get_adr+dcount;
							PE_Ram_din<=DestAdr0 ;
							
							elsif dcount=6 then
							adresse:=core_base_adr+1;
							adresse_rd:=core_base_adr;
							PE_Instr_En<='1';
							config_reg:=config_reg or x"01";
							PE_Ram_din<=config_reg ; --instruction pulse enable;
							
							timeout:=0;
							end if;
							dcount<=dcount+1;
					 end if;
					elsif  PE_Instr_En='0'then
						timeout:=timeout+1;
						if timeout>=10 then -- reprendre le contrle du Bus de force si ncessaire
							ram_busy<='1';
							timeout:=0;
							PE_Instr_En<='0';
						end if;
					end if;
			
				  if dcount >=6 then
						  Ram_busy<='0';--librer le bus et attendre la rponse du Core MPI
								if Lib_instr_ack='1' then -- Instruction ack
								  PE_Instr_En<='0'; 
									if Ramsel='0' then
										adresse:=core_base_adr+1;
									config_reg:=config_reg and x"f6";
									PE_Ram_din<=config_reg ; --ramener le IPulse  0;
										 
										Ram_busy<='0';
										RunState<=getcompleted;
									else
										
										Ram_busy<='1';  --force la prise du bus
									end if;
								else
								
									  timeout:=timeout+1; 
									  if timeout=150 then 
											RunState<=st_timeout;
										end if;
								end if;
								
								
						end if;
						
				
					
				when getcompleted =>
				adresse_rd:=core_get_adr+6;
				PE_Instr_En<='0';
				if PE_Ram_do(0)='1' then  --get completed
				   if Ramsel='0' then
					adresse:=core_base_adr+1;
					config_reg:=config_reg and x"f6";
					PE_Ram_din<=config_reg ; --ramener le IPulse  0;
					 
					RunState<=Terminate;
					else
						timeout:=timeout+1;
					end if;
				end if;	
					
						
			when terminate =>
			
		
					RunState<=start;
					
			when st_timeout =>
			  
			  --if ram_busy='1' then
				 RunState<=start;
			  --end if
		
			  RunState<=start;
			end case;
			pe_Ram_addra<=STD_LOGIC_VECTOR(to_unsigned(adresse,16));
			pe_Ram_addrb<=STD_LOGIC_VECTOR(to_unsigned(adresse_rd,16));
		end if;
	end if;

  end process pPutGet;	

majPutGet:process (RunState,pe_ram_do,sram,Lib_Init)

	begin
			case  RunState is
			when start =>
			
				   PE_Ram_we<='0';
					PE_Ram_ena<='0';
					PE_Ram_enb<='0';
					--PE_Instr_En<='0';

		 when fillmem =>
				   PE_Ram_we<='1';
					PE_Ram_ena<='1';
					
					PE_Ram_enb<='0';
					--PE_Instr_En<='0';
		 when nextfill =>
					PE_Ram_we<='1';
					PE_Ram_ena<='1';
					PE_Ram_enb<='0';
					
	    when InitApp =>
--				   PE_Ram_we<='1';
--					PE_Ram_ena<='1';
--					PE_Ram_enb<='0';
					PE_Ram_we<=sram.we;
					PE_Ram_ena<=sram.ena;
					PE_Ram_enb<=sram.enb;
					
		 when Initcompleted =>
			      
					PE_Ram_ena<=Lib_Init;
					PE_Ram_we<='1';
					PE_Ram_enb<='1';
					
			when GetRank1 =>
--					PE_Ram_ena<='0';
--					   --lecture du rang positionnement de l'adresse
--					PE_Ram_enb<='1';
--					--MyRank<=PE_ram_do(3 downto 0);
					PE_Ram_we<=sram.we;
					PE_Ram_ena<=sram.ena;
					PE_Ram_enb<=sram.enb;
			when GetRank2 =>
					PE_Ram_ena<='0';
					   --lecture effective du rang
					PE_Ram_enb<='1';
					--MyRank<=PE_ram_do(3 downto 0);
			when GetRank3 =>
					PE_Ram_ena<='0';
					   --lecture effective du rang
					PE_Ram_enb<='1';
					--MyRank<=PE_ram_do(3 downto 0);		
		 when writeptr =>
					PE_Ram_we  <='1';   --criture dans la RAM
					PE_Ram_ena <='1';
					
					PE_Ram_enb <='0';
			--		dcount<=dcount+1;
					
					--PE_Instr_En<='0';
		when InstrCopy =>  --instruction copy
				   PE_Ram_we<='0';
					PE_Ram_ena<='0';
					PE_Ram_enb<='0';

		
		when putdata =>   --positionnement du mot de longueur des donnes
				--dcount<=0;
				srcadr0<=X"00";
				srcadr1<=X"01";
				destadr0<=X"00";
				destadr1<=X"02";
				   PE_Ram_we<='0';
					PE_Ram_ena<='0';
					   --lecture du n de port de destination
					PE_Ram_enb<='1';
				datalen<=std_logic_vector(to_unsigned(10,8));
				dpid_i<=to_integer(unsigned(PE_ram_do(3 downto 0))); --le port est situ ur les 4 bits de poids faible
				--PE_Instr_En<='0';
			when putdata2 =>
--					PE_Ram_we  <='1';   --criture dans la RAM
--					PE_Ram_ena <='1';
--					PE_Ram_enb <='0';
					srcadr0<=X"00";
					srcadr1<=X"01";
					destadr0<=X"00";
					destadr1<=X"02";
					PE_Ram_we<=sram.we;
					PE_Ram_ena<=sram.ena;
					PE_Ram_enb<=sram.enb;
					
			when putcompleted =>			
					PE_Ram_we  <='1';
					PE_Ram_ena <='1';
					  -- lecture du rsultat
					PE_Ram_enb  <='1';
					--PE_Instr_En<='1';
			when getdata =>
				--dcount<=0;
				PE_Ram_we<='1';
				PE_Ram_ena<='1';
				PE_Ram_enb<='0';

				srcadr0<=X"50";
				srcadr1<=X"01";
				destadr0<=X"00";
				destadr1<=X"03";
				datalen<=std_logic_vector(to_unsigned(10,8));
				--PE_Instr_En<='0';	
			when getdata2 =>
				   PE_Ram_we  <='1';   --criture dans la RAM
					PE_Ram_ena <='1';
					
					PE_Ram_enb <='0';
				--dcount<=dcount+1;
				   if dcount=5 then
						--PE_Instr_En<='1';
					else
						--PE_Instr_En<='0';
					end if;
			when getcompleted =>				
					PE_Ram_we  <='1';
					PE_Ram_ena <='1';
					  -- lecture du rsultat
					PE_Ram_enb  <='1';
					--PE_Instr_En<='1';
			when terminate =>
			
				   PE_Ram_we<='0';
					PE_Ram_ena<='0';
					PE_Ram_enb<='0';
					--PE_Instr_En<='0';
			
			when st_timeout =>
				  PE_Ram_we<='0';
					PE_Ram_ena<='0';
					PE_Ram_enb<='0';
					--PE_Instr_En<='0';
			  
			end case;
		
end process majPutGet ; 
end Behavioral;

