---------------------------------------------------------------------------------- -- Company: -- Engineer: -- -- Create Date: 21:20:54 07/16/2012 -- Design Name: -- Module Name: PE - Behavioral -- Project Name: -- Target Devices: -- Tool versions: -- Description: -- -- Dependencies: -- -- Revision: -- Revision 0.01 - File Created -- Additional Comments: -- ---------------------------------------------------------------------------------- library IEEE; use IEEE.STD_LOGIC_1164.ALL; library NocLib ; --use IEEE.STD_LOGIC_ARITH.ALL; --use IEEE.STD_LOGIC_UNSIGNED.ALL; use NocLib.CoreTypes.all; use work.Packet_type.all; use work.MPI_RMA.all; use IEEE.NUMERIC_STD.ALL; entity PE is Generic (DestId : natural ); Port ( Instruction : out STD_LOGIC_VECTOR (Word-1 downto 0); Instruction_en : out STD_LOGIC; Core_PushOut : in STD_LOGIC_VECTOR (Word-1 downto 0); clk : in STD_LOGIC; reset : in STD_LOGIC; Core_RAM_Data_Out : out STD_LOGIC_VECTOR (Word-1 downto 0); Core_RAM_Data_In : in STD_LOGIC_VECTOR (Word-1 downto 0); Core_RAM_WE : in STD_LOGIC; Core_RAM_EN : in STD_LOGIC; --Core_RAM_ENB : in STD_LOGIC; Core_RAM_ADDRESS_WR : in STD_LOGIC_VECTOR (ADRLEN-1 downto 0); Core_RAM_ADDRESS_RD : in STD_LOGIC_VECTOR (ADRLEN-1 downto 0); Core_Hold_req : in STD_LOGIC; Core_Hold_Ack : out STD_LOGIC); end PE; architecture Behavioral of PE is COMPONENT RAM_v generic (width : positive;size :positive); PORT( clka : IN std_logic; clkb : IN std_logic; wea : IN std_logic; ena : IN std_logic; enb : IN std_logic; addra : IN std_logic_vector; addrb : IN std_logic_vector; dia : IN std_logic_vector; dob : OUT std_logic_vector ); END COMPONENT; --données du programme PE --signaux pour l'interconnexionsignal datain :std_logic_vector(word-1 downto 0):= (others => '0'); signal ram_we ,ram_ena,ram_enb,ramsel: std_logic:='0'; signal pe_ram_we ,pe_ram_ena,pe_ram_enb: std_logic; signal pe_instr_en,pe_hold_ack: std_logic:='0'; signal ram_do,ram_din:std_logic_vector(word-1 downto 0):= (others => '0'); signal pe_ram_do,pe_ram_din:std_logic_vector(word-1 downto 0):= (others => '0'); signal ram_addra,ram_addrb :std_logic_vector(ADRLEN-1 downto 0); signal pe_ram_addra,pe_ram_addrb :std_logic_vector(ADRLEN-1 downto 0); signal sram : typ_dpram; signal SrcAdr0,SrcAdr1,destAdr0,destAdr1,Datalen:std_logic_vector(word-1 downto 0); signal dpid,dpid_i : natural range 0 to 15:=DestId; signal MyRank :std_logic_vector(3 downto 0); signal Libr : Core_io; --regroupe tous les signaux IO de la bibliothèque signal Lib_Ready:std_logic; --indique que l'exécution de la fonction est terminée signal Lib_instr_ack : std_logic; -- l'instruction est copiée dans le tampon FIFO signal Lib_Init : std_logic; -- l'initialisation est terminée --signaux pour la gestion de la MAE type typ_mae is (start,Fillmem,NextFill,InitApp,InitCompleted,GetRank1,GetRank2,GetRank3,writeptr,InstrCopy, putdata,putdata2,putcompleted,getdata,getdata2,getcompleted,terminate,st_timeout); signal dcount : natural range 0 to 255:=0; --permet de compter le packet de données envoyées signal count,count_i : natural range 0 to 15:=0; signal RunState : typ_mae; signal Ram_busy :std_logic:='0'; begin Inst_RAM_v: RAM_v generic map(width=>word,size=>ADRLEN) PORT MAP( clka =>clk, clkb => clk, wea => ram_we, ena => ram_ena, enb => ram_enb, addra => ram_addra, addrb =>ram_addrb, dia => ram_din, dob => ram_do ); --================================================================ --MUX de la RAM Ram_mux: process (ramsel,pe_ram_addra,pe_ram_addrb,Core_ram_address_rd,Core_ram_address_wr, Core_ram_en,Core_ram_we,Core_ram_data_in,pe_ram_ena,pe_ram_enb,Ram_do, Pe_ram_din,Pe_ram_we ) begin case ramsel is when '1' => ram_addra <= Core_ram_address_wr ; ram_addrb <= Core_ram_address_rd ; ram_ena <= Core_ram_en; ram_enb <= Core_ram_en; ram_we<= Core_ram_we; ram_din <= Core_ram_data_in; pe_ram_do<=(others=>'Z'); Core_ram_data_out<=ram_do; when others => ram_addra <= pe_ram_addra; ram_addrb <= pe_ram_addrb; ram_ena <= pe_ram_ena; ram_enb <= pe_ram_enb; ram_we<= pe_ram_we; ram_din <=pe_ram_din; Core_ram_data_out<=(others=>'Z'); pe_ram_do<=ram_do; end case ; end process ; Instruction_En<=PE_instr_EN; -- Libr.Instr_en; --********A changer ********** --=== !!!!! attention la suppression de la ligne ci-dessous empêche ce -- composant de bien fonctionner !!! !!!!!!!!!!!!!!!!!!!!!!! instruction<=std_logic_vector(to_unsigned(Core_upper_adr,8)); dpid<=dpid_i; Lib_Instr_ack<=Core_Pushout(0); --l'instruction a été copié Lib_init<=Core_Pushout(4); -- Initialized -- pe_hold_req<=Core_hold_req; --Core_hold_ack<=pe_hold_ack; hold:process (Core_Hold_Req,clk,reset) begin if rising_edge(clk) then if reset='1' then Core_Hold_Ack<='0'; else if Core_Hold_Req='1' then ramsel<=not(ram_busy); Core_Hold_Ack<=not(ram_busy); --si la mémoire est occupé, forcé une libération Pe_hold_ack<=not(ram_busy); else Core_Hold_Ack<='0'; ramsel<='0'; Pe_hold_ack<='0'; end if; end if; end if; end process hold; --======================================================================= --======================================================================= --MAE du PE --======================================================================= pPutGet:process(clk,Core_Pushout,Core_Hold_req,PE_hold_Ack,RamSel,PE_Ram_do) constant DATAPTR : natural :=256; variable bfill,destrank,pid,mport : natural range 0 to 15; variable fsrc,ret : natural range 0 to 15:=0; variable timeout,ct,dlen : natural range 0 to 255; variable adrToset,SrcAdr,DestAdr : std_logic_vector(ADRLEN-1 downto 0); variable iack : std_logic:='0'; variable adresse,adresse_rd :natural range 0 to 65536; variable status_reg,config_reg :std_logic_vector(Word-1 downto 0):=(others=>'0'); begin --=== Partie combinatoire du process =================================== Libr.Instr_ack<=Core_pushout(0); Libr.InitOk<=Core_pushout(4); Libr.Hold_Req<=Core_Hold_req; Libr.Hold_Ack<=Pe_Hold_Ack; Libr.RamSel<=RamSel; sram.data_out<=PE_ram_do; --=== Fin de la partie combinatoire du process ========================== if (clk'event and clk='1') then if reset='1' then RunState<=start; else Libr.Instr_ack<=Core_pushout(0); Libr.InitOk<=Core_pushout(4); Libr.Hold_Req<=Core_Hold_req; Libr.Hold_Ack<=Pe_Hold_Ack; Libr.RamSel<=RamSel; sram.data_out<=PE_ram_do; case RunState is when start => Dcount<=0; if bfill=0 then -- si le nombre de bloc de mémoire remplis est vide RunState<=Fillmem; end if; Ram_busy<='0'; PE_Instr_En<='0'; iack:='0'; adresse:=DATAPTR; adresse_rd:=0; timeout:=0; dcount<=0; when Fillmem => if Ramsel='0' then PE_Ram_din<=std_logic_vector(to_unsigned(dcount,8)); -- x"0f"; PE_Instr_En<='0'; dcount<=dcount+1; if dcount=50 then bfill:=bfill+1; if bfill=4 then RunState<=InitApp; else RunState<=nextfill; end if; else adresse:=adresse+1; RunState<=Fillmem; end if; else -- attente de la libéraion de la mémoire timeout:=timeout+1; if timeout=100 then RunState<=st_timeout; end if; end if; when nextfill => --prépare le prochain bloc mémoire qui sera rempli adresse:=100*bfill; dcount<=0; ct:=0; RunState<=Fillmem; PE_Instr_En<='0'; when InitApp => --code pour Init pMPI_Init(ct,Libr,Clk,SRam); PE_Instr_EN<=Libr.instr_en; adresse:=to_integer(unsigned(sram.addr_wr)); adresse_rd:=to_integer(unsigned(sram.addr_rd)); PE_ram_din<=sram.data_in; --if Libr.InitOk='1' then if ct=0 then RunState<=GetRank1; end if; when writeptr => PE_Instr_En<='0'; if Ramsel= '0' then --s'assurer que le bus est disponible if dcount=0 then PE_RAM_Din<=AdrToSet(Word-1 downto 0); dcount <=dcount+1; --adresse:=adresse+1; --prépare la prochaine écriture elsif dcount=1 then dcount <=dcount+1; adresse:=adresse+1; --prépare la prochaine écriture PE_RAM_Din<=AdrToSet(15 downto 8); elsif dcount=2 then -- ce cycle permet juste de vider le tampon d'écriture en RAM ret:=fsrc; dcount<=0; timeout:=0; if fsrc=1 then RunState <= InitApp; elsif fsrc=2 then RunState <= putdata; elsif fsrc=3 then RunState <= getdata; else RunState <= start; end if; end if; end if; When InstrCopy => if Lib_instr_ack='1' then RunState<=Writeptr; PE_instr_en<='0'; iack:='1'; else PE_Instr_en<='1'; end if; when InitCompleted => adresse:=CORE_BASE_ADR; status_reg:=status_reg or x"10"; PE_Ram_din<=status_reg ; if Lib_Init='1' then RunState<=GetRank1; PE_Instr_En<='0'; --instruction(5)<='1'; else PE_Instr_En<='0'; end if; when GetRank1 => pMPI_Comm_rank(ct,Libr,sram,MPI_COMM_WORLD,MyRank); if ct=0 then RunState<=PutData2; end if; adresse_rd:=to_integer(unsigned(sram.addr_rd)); -- adresse_rd:=CORE_INIT_ADR+1; -- if ramsel='0' then -- RunState<=getrank2; -- end if; when GetRank2 => adresse_rd:=CORE_INIT_ADR+1; if ramsel='0' then RunState<=Getrank3; end if; when GetRank3=> adresse_rd:=CORE_INIT_ADR+1; if ramsel='0' then RunState<=putdata2; end if; when putdata => --construire le packet pour le Put if unsigned(MyRank) = 0 then Destrank:=1; else DestRank:=0; end if; adresse_rd:=core_base_adr+Core_Rank2port_base+DestRank; PE_Instr_En<='0'; timeout:=0; dcount<=0; fsrc:=2; adrToSet:=std_logic_vector(to_unsigned(core_put_adr,16)); if ret/=fsrc then adresse:=core_base_adr+2; RunState<=writeptr; ret:=0; else if Lib_instr_ack/='1' then RunState<= putdata2; end if; end if; when putdata2 => if unsigned(MyRank) = 0 then Destrank:=1; else DestRank:=0; end if; dlen:=10; --- to_integer(unsigned(datalen)); SrcAdr:=std_logic_vector(to_unsigned(DATAPTR,ADRLEN)); DestAdr:=X"2000"; pMPI_put(ct,Libr,Clk,Sram,SrcAdr,Dlen,MPI_int,destrank,DestAdr1 & DestAdr,Dlen,Mpi_int,Default_win); adresse:=to_integer(unsigned(sram.addr_wr)); adresse_rd:=to_integer(unsigned(sram.addr_rd)); PE_Instr_EN<=Libr.instr_en; PE_ram_din<=sram.data_in; dcount<=ct; if ct=0 then RunState<=GetData; end if; -- if dcount<=6 then -- -- elsif dcount=7 then -- -- PE_Instr_En<='1'; -- -- end if; -- elsif PE_instr_En='0' then -- timeout:=timeout+1; -- if timeout>=10 then -- reprendre le contrôle du Bus de force si nécessaire -- ram_busy<='1'; -- timeout:=0; -- PE_Instr_En<='0'; -- end if; -- end if; -- if dcount >=6 then -- Ram_busy<='0';--libérer le bus et attendre la réponse du Core MPI -- if Lib_instr_ack='1' then -- Instruction ack -- PE_Instr_En<='0'; -- if Ramsel='0' then -- adresse:=core_base_adr+1; -- config_reg:=config_reg and x"f6"; -- PE_Ram_din<=config_reg ; --ramener le IPulse à 0; -- Ram_busy<='0'; -- RunState<=putcompleted; -- else -- -- Ram_busy<='1'; --force la prise du bus -- end if; -- else -- -- timeout:=timeout+1; -- if timeout=150 then -- RunState<=st_timeout; -- end if; -- end if; -- end if; when putcompleted => adresse_rd:=core_put_adr+6; if PE_Ram_do(0)='1' then --Put completed RunState<=GetData; end if; PE_Instr_En<='0'; when getdata => --positionnement du mot de longueur des données --DestRank:=1; timeout:=0; dcount<=0; fsrc:=3; adrToSet:=std_logic_vector(to_unsigned(core_get_adr,16)); if ret/=fsrc then adresse:=core_base_adr+2; RunState<=writeptr; ret:=0; else adresse:=core_get_adr; RunState<= getdata2; end if; PE_Instr_En<='0'; when getdata2 => if ramsel='0' then if dcount<=6 then if dcount=0 then adresse:=core_get_adr; PE_Ram_din<=MPI_GET & std_logic_vector(to_unsigned(DestRank,4)); elsif dcount=1 then adresse:=core_get_adr+dcount; PE_Ram_din<=Datalen ; elsif dcount=2 then adresse:=core_get_adr+dcount; PE_Ram_din<=SrcAdr1 ; elsif dcount=3 then adresse:=core_get_adr+dcount; PE_Ram_din<=SrcAdr0 ; elsif dcount=4 then adresse:=core_get_adr+dcount; PE_Ram_din<=DestAdr1 ; elsif dcount=5 then adresse:=core_get_adr+dcount; PE_Ram_din<=DestAdr0 ; elsif dcount=6 then adresse:=core_base_adr+1; adresse_rd:=core_base_adr; PE_Instr_En<='1'; config_reg:=config_reg or x"01"; PE_Ram_din<=config_reg ; --instruction pulse enable; timeout:=0; end if; dcount<=dcount+1; end if; elsif PE_Instr_En='0'then timeout:=timeout+1; if timeout>=10 then -- reprendre le contrôle du Bus de force si nécessaire ram_busy<='1'; timeout:=0; PE_Instr_En<='0'; end if; end if; if dcount >=6 then Ram_busy<='0';--libérer le bus et attendre la réponse du Core MPI if Lib_instr_ack='1' then -- Instruction ack PE_Instr_En<='0'; if Ramsel='0' then adresse:=core_base_adr+1; config_reg:=config_reg and x"f6"; PE_Ram_din<=config_reg ; --ramener le IPulse à 0; Ram_busy<='0'; RunState<=getcompleted; else Ram_busy<='1'; --force la prise du bus end if; else timeout:=timeout+1; if timeout=150 then RunState<=st_timeout; end if; end if; end if; when getcompleted => adresse_rd:=core_get_adr+6; PE_Instr_En<='0'; if PE_Ram_do(0)='1' then --get completed if Ramsel='0' then adresse:=core_base_adr+1; config_reg:=config_reg and x"f6"; PE_Ram_din<=config_reg ; --ramener le IPulse à 0; RunState<=Terminate; else timeout:=timeout+1; end if; end if; when terminate => RunState<=start; when st_timeout => --if ram_busy='1' then RunState<=start; --end if RunState<=start; end case; pe_Ram_addra<=STD_LOGIC_VECTOR(to_unsigned(adresse,16)); pe_Ram_addrb<=STD_LOGIC_VECTOR(to_unsigned(adresse_rd,16)); end if; end if; end process pPutGet; majPutGet:process (RunState,pe_ram_do,sram,Lib_Init) begin case RunState is when start => PE_Ram_we<='0'; PE_Ram_ena<='0'; PE_Ram_enb<='0'; --PE_Instr_En<='0'; when fillmem => PE_Ram_we<='1'; PE_Ram_ena<='1'; PE_Ram_enb<='0'; --PE_Instr_En<='0'; when nextfill => PE_Ram_we<='1'; PE_Ram_ena<='1'; PE_Ram_enb<='0'; when InitApp => -- PE_Ram_we<='1'; -- PE_Ram_ena<='1'; -- PE_Ram_enb<='0'; PE_Ram_we<=sram.we; PE_Ram_ena<=sram.ena; PE_Ram_enb<=sram.enb; when Initcompleted => PE_Ram_ena<=Lib_Init; PE_Ram_we<='1'; PE_Ram_enb<='1'; when GetRank1 => -- PE_Ram_ena<='0'; -- --lecture du rang positionnement de l'adresse -- PE_Ram_enb<='1'; -- --MyRank<=PE_ram_do(3 downto 0); PE_Ram_we<=sram.we; PE_Ram_ena<=sram.ena; PE_Ram_enb<=sram.enb; when GetRank2 => PE_Ram_ena<='0'; --lecture effective du rang PE_Ram_enb<='1'; --MyRank<=PE_ram_do(3 downto 0); when GetRank3 => PE_Ram_ena<='0'; --lecture effective du rang PE_Ram_enb<='1'; --MyRank<=PE_ram_do(3 downto 0); when writeptr => PE_Ram_we <='1'; --écriture dans la RAM PE_Ram_ena <='1'; PE_Ram_enb <='0'; -- dcount<=dcount+1; --PE_Instr_En<='0'; when InstrCopy => --instruction copy PE_Ram_we<='0'; PE_Ram_ena<='0'; PE_Ram_enb<='0'; when putdata => --positionnement du mot de longueur des données --dcount<=0; srcadr0<=X"00"; srcadr1<=X"01"; destadr0<=X"00"; destadr1<=X"02"; PE_Ram_we<='0'; PE_Ram_ena<='0'; --lecture du n° de port de destination PE_Ram_enb<='1'; datalen<=std_logic_vector(to_unsigned(10,8)); dpid_i<=to_integer(unsigned(PE_ram_do(3 downto 0))); --le port est situé ur les 4 bits de poids faible --PE_Instr_En<='0'; when putdata2 => -- PE_Ram_we <='1'; --écriture dans la RAM -- PE_Ram_ena <='1'; -- PE_Ram_enb <='0'; srcadr0<=X"00"; srcadr1<=X"01"; destadr0<=X"00"; destadr1<=X"02"; PE_Ram_we<=sram.we; PE_Ram_ena<=sram.ena; PE_Ram_enb<=sram.enb; when putcompleted => PE_Ram_we <='1'; PE_Ram_ena <='1'; -- lecture du résultat PE_Ram_enb <='1'; --PE_Instr_En<='1'; when getdata => --dcount<=0; PE_Ram_we<='1'; PE_Ram_ena<='1'; PE_Ram_enb<='0'; srcadr0<=X"50"; srcadr1<=X"01"; destadr0<=X"00"; destadr1<=X"03"; datalen<=std_logic_vector(to_unsigned(10,8)); --PE_Instr_En<='0'; when getdata2 => PE_Ram_we <='1'; --écriture dans la RAM PE_Ram_ena <='1'; PE_Ram_enb <='0'; --dcount<=dcount+1; if dcount=5 then --PE_Instr_En<='1'; else --PE_Instr_En<='0'; end if; when getcompleted => PE_Ram_we <='1'; PE_Ram_ena <='1'; -- lecture du résultat PE_Ram_enb <='1'; --PE_Instr_En<='1'; when terminate => PE_Ram_we<='0'; PE_Ram_ena<='0'; PE_Ram_enb<='0'; --PE_Instr_En<='0'; when st_timeout => PE_Ram_we<='0'; PE_Ram_ena<='0'; PE_Ram_enb<='0'; --PE_Instr_En<='0'; end case; end process majPutGet ; end Behavioral;