blob: c60cefb73a48161a7d95e0286504ded9d8b1c2d8 [file] [log] [blame]
/*
* This file is part of the coreboot project.
*
* Copyright (C) 2010 Advanced Micro Devices, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/******************************************************************************
Description: Receiver En and DQS Timing Training feature for DDR 3 MCT
******************************************************************************/
static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass);
static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
u8 rcvrEnDly, u8 Channel,
u8 receiver, u8 Pass);
static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u32 addr, u8 channel,
u8 pattern, u8 Pass);
static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat);
static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel);
static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel);
static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat,
u8 RcvrEnDly, u8 where,
u8 Channel, u8 Receiver,
u32 dev, u32 index_reg,
u8 Addl_Index, u8 Pass);
static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly);
static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct);
static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat);
/* Warning: These must be located so they do not cross a logical 16-bit
segment boundary! */
const static u32 TestPattern0_D[] = {
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
};
const static u32 TestPattern1_D[] = {
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
0x55555555, 0x55555555, 0x55555555, 0x55555555,
};
const static u32 TestPattern2_D[] = {
0x12345678, 0x87654321, 0x23456789, 0x98765432,
0x59385824, 0x30496724, 0x24490795, 0x99938733,
0x40385642, 0x38465245, 0x29432163, 0x05067894,
0x12349045, 0x98723467, 0x12387634, 0x34587623,
};
static void SetupRcvrPattern(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u32 *buffer, u8 pass)
{
/*
* 1. Copy the alpha and Beta patterns from ROM to Cache,
* aligning on 16 byte boundary
* 2. Set the ptr to DCTStatstruc.PtrPatternBufA for Alpha
* 3. Set the ptr to DCTStatstruc.PtrPatternBufB for Beta
*/
u32 *buf_a;
u32 *buf_b;
u32 *p_A;
u32 *p_B;
u8 i;
buf_a = (u32 *)(((u32)buffer + 0x10) & (0xfffffff0));
buf_b = buf_a + 32; /* ?? */
p_A = (u32 *)SetupDqsPattern_1PassB(pass);
p_B = (u32 *)SetupDqsPattern_1PassA(pass);
for(i=0;i<16;i++) {
buf_a[i] = p_A[i];
buf_b[i] = p_B[i];
}
pDCTstat->PtrPatternBufA = (u32)buf_a;
pDCTstat->PtrPatternBufB = (u32)buf_b;
}
void mct_TrainRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass)
{
if(mct_checkNumberOfDqsRcvEn_1Pass(Pass))
dqsTrainRcvrEn_SW(pMCTstat, pDCTstat, Pass);
}
static void dqsTrainRcvrEn_SW(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Pass)
{
u8 Channel, RcvrEnDly, RcvrEnDlyRmin;
u8 Test0, Test1, CurrTest, CurrTestSide0, CurrTestSide1;
u8 CTLRMaxDelay, _2Ranks, PatternA, PatternB;
u8 Addl_Index = 0;
u8 Receiver;
u8 _DisableDramECC = 0, _Wrap32Dis = 0, _SSE2 = 0;
u8 RcvrEnDlyLimit, Final_Value, MaxDelay_CH[2];
u32 TestAddr0, TestAddr1, TestAddr0B, TestAddr1B;
u32 PatternBuffer[64+4]; /* FIXME: need increase 8? */
u32 Errors;
u32 val;
u32 reg;
u32 dev;
u32 index_reg;
u32 ch_start, ch_end, ch;
u32 msr;
u32 cr4;
u32 lo, hi;
u8 valid;
u32 tmp;
u8 LastTest;
print_debug_dqs("\nTrainRcvEn: Node", pDCTstat->Node_ID, 0);
print_debug_dqs("TrainRcvEn: Pass", Pass, 0);
dev = pDCTstat->dev_dct;
ch_start = 0;
if(!pDCTstat->GangedMode) {
ch_end = 2;
} else {
ch_end = 1;
}
for (ch = ch_start; ch < ch_end; ch++) {
reg = 0x78 + (0x100 * ch);
val = Get_NB32(dev, reg);
val &= ~(0x3ff << 22);
val |= (0x0c8 << 22); /* Max Rd Lat */
Set_NB32(dev, reg, val);
}
Final_Value = 1;
if (Pass == FirstPass) {
mct_InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat);
} else {
pDCTstat->DimmTrainFail = 0;
pDCTstat->CSTrainFail = ~pDCTstat->CSPresent;
}
cr4 = read_cr4();
if(cr4 & ( 1 << 9)) { /* save the old value */
_SSE2 = 1;
}
cr4 |= (1 << 9); /* OSFXSR enable SSE2 */
write_cr4(cr4);
msr = HWCR;
_RDMSR(msr, &lo, &hi);
/* FIXME: Why use SSEDIS */
if(lo & (1 << 17)) { /* save the old value */
_Wrap32Dis = 1;
}
lo |= (1 << 17); /* HWCR.wrap32dis */
lo &= ~(1 << 15); /* SSEDIS */
_WRMSR(msr, lo, hi); /* Setting wrap32dis allows 64-bit memory references in real mode */
_DisableDramECC = mct_DisableDimmEccEn_D(pMCTstat, pDCTstat);
SetupRcvrPattern(pMCTstat, pDCTstat, PatternBuffer, Pass);
Errors = 0;
dev = pDCTstat->dev_dct;
CTLRMaxDelay = 0;
for (Channel = 0; Channel < 2; Channel++) {
print_debug_dqs("\tTrainRcvEn51: Node ", pDCTstat->Node_ID, 1);
print_debug_dqs("\tTrainRcvEn51: Channel ", Channel, 1);
pDCTstat->Channel = Channel;
MaxDelay_CH[Channel] = 0;
index_reg = 0x98 + 0x100 * Channel;
Receiver = mct_InitReceiver_D(pDCTstat, Channel);
/* There are four receiver pairs, loosely associated with chipselects. */
for (; Receiver < 8; Receiver += 2) {
Addl_Index = (Receiver >> 1) * 3 + 0x10;
LastTest = DQS_FAIL;
/* mct_ModifyIndex_D */
RcvrEnDlyRmin = RcvrEnDlyLimit = 0xff;
print_debug_dqs("\t\tTrainRcvEnd52: index ", Addl_Index, 2);
if(!mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver)) {
continue;
}
TestAddr0 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver, &valid);
if(!valid) { /* Address not supported on current CS */
continue;
}
TestAddr0B = TestAddr0 + (BigPagex8_RJ8 << 3);
if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, Receiver+1)) {
TestAddr1 = mct_GetRcvrSysAddr_D(pMCTstat, pDCTstat, Channel, Receiver+1, &valid);
if(!valid) { /* Address not supported on current CS */
continue;
}
TestAddr1B = TestAddr1 + (BigPagex8_RJ8 << 3);
_2Ranks = 1;
} else {
_2Ranks = TestAddr1 = TestAddr1B = 0;
}
print_debug_dqs("\t\tTrainRcvEn53: TestAddr0 ", TestAddr0, 2);
print_debug_dqs("\t\tTrainRcvEn53: TestAddr0B ", TestAddr0B, 2);
print_debug_dqs("\t\tTrainRcvEn53: TestAddr1 ", TestAddr1, 2);
print_debug_dqs("\t\tTrainRcvEn53: TestAddr1B ", TestAddr1B, 2);
/*
* Get starting RcvrEnDly value
*/
RcvrEnDly = mct_Get_Start_RcvrEnDly_1Pass(Pass);
/* mct_GetInitFlag_D*/
if (Pass == FirstPass) {
pDCTstat->DqsRcvEn_Pass = 0;
} else {
pDCTstat->DqsRcvEn_Pass=0xFF;
}
pDCTstat->DqsRcvEn_Saved = 0;
while(RcvrEnDly < RcvrEnDlyLimit) { /* sweep Delay value here */
print_debug_dqs("\t\t\tTrainRcvEn541: RcvrEnDly ", RcvrEnDly, 3);
/* callback not required
if(mct_AdjustDelay_D(pDCTstat, RcvrEnDly))
goto skipDly;
*/
/* Odd steps get another pattern such that even
and odd steps alternate. The pointers to the
patterns will be swaped at the end of the loop
so that they correspond. */
if(RcvrEnDly & 1) {
PatternA = 1;
PatternB = 0;
} else {
/* Even step */
PatternA = 0;
PatternB = 1;
}
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0, PatternA); /* rank 0 of DIMM, testpattern 0 */
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B, PatternB); /* rank 0 of DIMM, testpattern 1 */
if(_2Ranks) {
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1, PatternA); /*rank 1 of DIMM, testpattern 0 */
mct_Write1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B, PatternB); /*rank 1 of DIMM, testpattern 1 */
}
mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, 0, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
CurrTest = DQS_FAIL;
CurrTestSide0 = DQS_FAIL;
CurrTestSide1 = DQS_FAIL;
mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0); /*cache fills */
Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0, Channel, PatternA, Pass);/* ROM vs cache compare */
proc_IOCLFLUSH_D(TestAddr0);
ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
print_debug_dqs("\t\t\tTrainRcvEn542: Test0 result ", Test0, 3);
/* != 0x00 mean pass */
if(Test0 == DQS_PASS) {
mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr0B); /*cache fills */
/* ROM vs cache compare */
Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr0B, Channel, PatternB, Pass);
proc_IOCLFLUSH_D(TestAddr0B);
ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
print_debug_dqs("\t\t\tTrainRcvEn543: Test1 result ", Test1, 3);
if(Test1 == DQS_PASS) {
CurrTestSide0 = DQS_PASS;
}
}
if(_2Ranks) {
mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1); /*cache fills */
/* ROM vs cache compare */
Test0 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1, Channel, PatternA, Pass);
proc_IOCLFLUSH_D(TestAddr1);
ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
print_debug_dqs("\t\t\tTrainRcvEn544: Test0 result ", Test0, 3);
if(Test0 == DQS_PASS) {
mct_Read1LTestPattern_D(pMCTstat, pDCTstat, TestAddr1B); /*cache fills */
/* ROM vs cache compare */
Test1 = mct_CompareTestPatternQW0_D(pMCTstat, pDCTstat, TestAddr1B, Channel, PatternB, Pass);
proc_IOCLFLUSH_D(TestAddr1B);
ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
print_debug_dqs("\t\t\tTrainRcvEn545: Test1 result ", Test1, 3);
if(Test1 == DQS_PASS) {
CurrTestSide1 = DQS_PASS;
}
}
}
if(_2Ranks) {
if ((CurrTestSide0 == DQS_PASS) && (CurrTestSide1 == DQS_PASS)) {
CurrTest = DQS_PASS;
}
} else if (CurrTestSide0 == DQS_PASS) {
CurrTest = DQS_PASS;
}
/* record first pass DqsRcvEn to stack */
valid = mct_SavePassRcvEnDly_D(pDCTstat, RcvrEnDly, Channel, Receiver, Pass);
/* Break(1:RevF,2:DR) or not(0) FIXME: This comment deosn't make sense */
if(valid == 2 || (LastTest == DQS_FAIL && valid == 1)) {
RcvrEnDlyRmin = RcvrEnDly;
break;
}
LastTest = CurrTest;
/* swap the rank 0 pointers */
tmp = TestAddr0;
TestAddr0 = TestAddr0B;
TestAddr0B = tmp;
/* swap the rank 1 pointers */
tmp = TestAddr1;
TestAddr1 = TestAddr1B;
TestAddr1B = tmp;
print_debug_dqs("\t\t\tTrainRcvEn56: RcvrEnDly ", RcvrEnDly, 3);
RcvrEnDly++;
} /* while RcvrEnDly */
print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDly ", RcvrEnDly, 2);
print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyRmin ", RcvrEnDlyRmin, 3);
print_debug_dqs("\t\tTrainRcvEn61: RcvrEnDlyLimit ", RcvrEnDlyLimit, 3);
if(RcvrEnDlyRmin == RcvrEnDlyLimit) {
/* no passing window */
pDCTstat->ErrStatus |= 1 << SB_NORCVREN;
Errors |= 1 << SB_NORCVREN;
pDCTstat->ErrCode = SC_FatalErr;
}
if(RcvrEnDly > (RcvrEnDlyLimit - 1)) {
/* passing window too narrow, too far delayed*/
pDCTstat->ErrStatus |= 1 << SB_SmallRCVR;
Errors |= 1 << SB_SmallRCVR;
pDCTstat->ErrCode = SC_FatalErr;
RcvrEnDly = RcvrEnDlyLimit - 1;
pDCTstat->CSTrainFail |= 1 << Receiver;
pDCTstat->DimmTrainFail |= 1 << (Receiver + Channel);
}
/* CHB_D0_B0_RCVRDLY set in mct_Average_RcvrEnDly_Pass */
mct_Average_RcvrEnDly_Pass(pDCTstat, RcvrEnDly, RcvrEnDlyLimit, Channel, Receiver, Pass);
mct_SetFinalRcvrEnDly_D(pDCTstat, RcvrEnDly, Final_Value, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
if(pDCTstat->ErrStatus & (1 << SB_SmallRCVR)) {
Errors |= 1 << SB_SmallRCVR;
}
RcvrEnDly += Pass1MemClkDly;
if(RcvrEnDly > CTLRMaxDelay) {
CTLRMaxDelay = RcvrEnDly;
}
} /* while Receiver */
MaxDelay_CH[Channel] = CTLRMaxDelay;
} /* for Channel */
CTLRMaxDelay = MaxDelay_CH[0];
if (MaxDelay_CH[1] > CTLRMaxDelay)
CTLRMaxDelay = MaxDelay_CH[1];
for (Channel = 0; Channel < 2; Channel++) {
mct_SetMaxLatency_D(pDCTstat, Channel, CTLRMaxDelay); /* program Ch A/B MaxAsyncLat to correspond with max delay */
}
ResetDCTWrPtr_D(dev, index_reg, Addl_Index);
if(_DisableDramECC) {
mct_EnableDimmEccEn_D(pMCTstat, pDCTstat, _DisableDramECC);
}
if (Pass == FirstPass) {
/*Disable DQSRcvrEn training mode */
mct_DisableDQSRcvEn_D(pDCTstat);
}
if(!_Wrap32Dis) {
msr = HWCR;
_RDMSR(msr, &lo, &hi);
lo &= ~(1<<17); /* restore HWCR.wrap32dis */
_WRMSR(msr, lo, hi);
}
if(!_SSE2){
cr4 = read_cr4();
cr4 &= ~(1<<9); /* restore cr4.OSFXSR */
write_cr4(cr4);
}
#if DQS_TRAIN_DEBUG > 0
{
u8 Channel;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_MaxRdLat:\n");
for(Channel = 0; Channel<2; Channel++) {
printk(BIOS_DEBUG, "Channel:%x: %x\n",
Channel, pDCTstat->CH_MaxRdLat[Channel]);
}
}
#endif
#if DQS_TRAIN_DEBUG > 0
{
u8 val;
u8 Channel, Receiver;
u8 i;
u8 *p;
printk(BIOS_DEBUG, "TrainRcvrEn: CH_D_B_RCVRDLY:\n");
for(Channel = 0; Channel < 2; Channel++) {
printk(BIOS_DEBUG, "Channel:%x\n", Channel);
for(Receiver = 0; Receiver<8; Receiver+=2) {
printk(BIOS_DEBUG, "\t\tReceiver:%x:", Receiver);
p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver>>1];
for (i=0;i<8; i++) {
val = p[i];
printk(BIOS_DEBUG, "%x ", val);
}
printk(BIOS_DEBUG, "\n");
}
}
}
#endif
printk(BIOS_DEBUG, "TrainRcvrEn: Status %x\n", pDCTstat->Status);
printk(BIOS_DEBUG, "TrainRcvrEn: ErrStatus %x\n", pDCTstat->ErrStatus);
printk(BIOS_DEBUG, "TrainRcvrEn: ErrCode %x\n", pDCTstat->ErrCode);
printk(BIOS_DEBUG, "TrainRcvrEn: Done\n\n");
}
u8 mct_InitReceiver_D(struct DCTStatStruc *pDCTstat, u8 dct)
{
if (pDCTstat->DIMMValidDCT[dct] == 0 ) {
return 8;
} else {
return 0;
}
}
static void mct_SetFinalRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly, u8 where, u8 Channel, u8 Receiver, u32 dev, u32 index_reg, u8 Addl_Index, u8 Pass/*, u8 *p*/)
{
/*
* Program final DqsRcvEnDly to additional index for DQS receiver
* enabled delay
*/
mct_SetRcvrEnDly_D(pDCTstat, RcvrEnDly, where, Channel, Receiver, dev, index_reg, Addl_Index, Pass);
}
static void mct_DisableDQSRcvEn_D(struct DCTStatStruc *pDCTstat)
{
u8 ch_end, ch;
u32 reg;
u32 dev;
u32 val;
dev = pDCTstat->dev_dct;
if (pDCTstat->GangedMode) {
ch_end = 1;
} else {
ch_end = 2;
}
for (ch=0; ch<ch_end; ch++) {
reg = 0x78 + 0x100 * ch;
val = Get_NB32(dev, reg);
val &= ~(1 << DqsRcvEnTrain);
Set_NB32(dev, reg, val);
}
}
/* mct_ModifyIndex_D
* Function only used once so it was inlined.
*/
/* mct_GetInitFlag_D
* Function only used once so it was inlined.
*/
void mct_SetRcvrEnDly_D(struct DCTStatStruc *pDCTstat, u8 RcvrEnDly,
u8 FinalValue, u8 Channel, u8 Receiver, u32 dev,
u32 index_reg, u8 Addl_Index, u8 Pass)
{
u32 index;
u8 i;
u8 *p;
u32 val;
if(RcvrEnDly == 0xFE) {
/*set the boudary flag */
pDCTstat->Status |= 1 << SB_DQSRcvLimit;
}
/* DimmOffset not needed for CH_D_B_RCVRDLY array */
for(i=0; i < 8; i++) {
if(FinalValue) {
/*calculate dimm offset */
p = pDCTstat->CH_D_B_RCVRDLY[Channel][Receiver >> 1];
RcvrEnDly = p[i];
}
/* if flag=0, set DqsRcvEn value to reg. */
/* get the register index from table */
index = Table_DQSRcvEn_Offset[i >> 1];
index += Addl_Index; /* DIMMx DqsRcvEn byte0 */
val = Get_NB32_index_wait(dev, index_reg, index);
if(i & 1) {
/* odd byte lane */
val &= ~(0xFF << 16);
val |= (RcvrEnDly << 16);
} else {
/* even byte lane */
val &= ~0xFF;
val |= RcvrEnDly;
}
Set_NB32_index_wait(dev, index_reg, index, val);
}
}
static void mct_SetMaxLatency_D(struct DCTStatStruc *pDCTstat, u8 Channel, u8 DQSRcvEnDly)
{
u32 dev;
u32 reg;
u16 SubTotal;
u32 index_reg;
u32 reg_off;
u32 val;
u32 valx;
if(pDCTstat->GangedMode)
Channel = 0;
dev = pDCTstat->dev_dct;
reg_off = 0x100 * Channel;
index_reg = 0x98 + reg_off;
/* Multiply the CAS Latency by two to get a number of 1/2 MEMCLKs units.*/
val = Get_NB32(dev, 0x88 + reg_off);
SubTotal = ((val & 0x0f) + 4) << 1; /* SubTotal is 1/2 Memclk unit */
/* If registered DIMMs are being used then
* add 1 MEMCLK to the sub-total.
*/
val = Get_NB32(dev, 0x90 + reg_off);
if(!(val & (1 << UnBuffDimm)))
SubTotal += 2;
/* If the address prelaunch is setup for 1/2 MEMCLKs then
* add 1, else add 2 to the sub-total.
* if (AddrCmdSetup || CsOdtSetup || CkeSetup) then K := K + 2;
*/
val = Get_NB32_index_wait(dev, index_reg, 0x04);
if(!(val & 0x00202020))
SubTotal += 1;
else
SubTotal += 2;
/* If the F2x[1, 0]78[RdPtrInit] field is 4, 5, 6 or 7 MEMCLKs,
* then add 4, 3, 2, or 1 MEMCLKs, respectively to the sub-total. */
val = Get_NB32(dev, 0x78 + reg_off);
SubTotal += 8 - (val & 0x0f);
/* Convert bits 7-5 (also referred to as the course delay) of
* the current (or worst case) DQS receiver enable delay to
* 1/2 MEMCLKs units, rounding up, and add this to the sub-total.
*/
SubTotal += DQSRcvEnDly >> 5; /*BOZO-no rounding up */
/* Add 5.5 to the sub-total. 5.5 represents part of the
* processor specific constant delay value in the DRAM
* clock domain.
*/
SubTotal <<= 1; /*scale 1/2 MemClk to 1/4 MemClk */
SubTotal += 11; /*add 5.5 1/2MemClk */
/* Convert the sub-total (in 1/2 MEMCLKs) to northbridge
* clocks (NCLKs) as follows (assuming DDR400 and assuming
* that no P-state or link speed changes have occurred).
*/
/* New formula:
* SubTotal *= 3*(Fn2xD4[NBFid]+4)/(3+Fn2x94[MemClkFreq])/2 */
val = Get_NB32(dev, 0x94 + reg_off);
/* SubTotal div 4 to scale 1/4 MemClk back to MemClk */
val &= 7;
if (val >= 3) {
val <<= 1;
} else
val += 3;
valx = val << 2;
val = Get_NB32(pDCTstat->dev_nbmisc, 0xD4);
SubTotal *= ((val & 0x1f) + 4 ) * 3;
SubTotal /= valx;
if (SubTotal % valx) { /* round up */
SubTotal++;
}
/* Add 5 NCLKs to the sub-total. 5 represents part of the
* processor specific constant value in the northbridge
* clock domain.
*/
SubTotal += 5;
pDCTstat->CH_MaxRdLat[Channel] = SubTotal;
if(pDCTstat->GangedMode) {
pDCTstat->CH_MaxRdLat[1] = SubTotal;
}
/* Program the F2x[1, 0]78[MaxRdLatency] register with
* the total delay value (in NCLKs).
*/
reg = 0x78 + reg_off;
val = Get_NB32(dev, reg);
val &= ~(0x3ff << 22);
val |= (SubTotal & 0x3ff) << 22;
/* program MaxRdLatency to correspond with current delay */
Set_NB32(dev, reg, val);
}
static u8 mct_SavePassRcvEnDly_D(struct DCTStatStruc *pDCTstat,
u8 rcvrEnDly, u8 Channel,
u8 receiver, u8 Pass)
{
u8 i;
u8 mask_Saved, mask_Pass;
u8 *p;
/* calculate dimm offset
* not needed for CH_D_B_RCVRDLY array
*/
/* cmp if there has new DqsRcvEnDly to be recorded */
mask_Pass = pDCTstat->DqsRcvEn_Pass;
if(Pass == SecondPass) {
mask_Pass = ~mask_Pass;
}
mask_Saved = pDCTstat->DqsRcvEn_Saved;
if(mask_Pass != mask_Saved) {
/* find desired stack offset according to channel/dimm/byte */
if(Pass == SecondPass) {
/* FIXME: SecondPass is never used for Barcelona p = pDCTstat->CH_D_B_RCVRDLY_1[Channel][receiver>>1]; */
p = 0; /* Keep the compiler happy. */
} else {
mask_Saved &= mask_Pass;
p = pDCTstat->CH_D_B_RCVRDLY[Channel][receiver>>1];
}
for(i=0; i < 8; i++) {
/* cmp per byte lane */
if(mask_Pass & (1 << i)) {
if(!(mask_Saved & (1 << i))) {
/* save RcvEnDly to stack, according to
the related Dimm/byte lane */
p[i] = (u8)rcvrEnDly;
mask_Saved |= 1 << i;
}
}
}
pDCTstat->DqsRcvEn_Saved = mask_Saved;
}
return mct_SaveRcvEnDly_D_1Pass(pDCTstat, Pass);
}
static u8 mct_CompareTestPatternQW0_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat,
u32 addr, u8 channel,
u8 pattern, u8 Pass)
{
/* Compare only the first beat of data. Since target addrs are cache
* line aligned, the Channel parameter is used to determine which
* cache QW to compare.
*/
u8 *test_buf;
u8 i;
u8 result;
u8 value;
if(Pass == FirstPass) {
if(pattern==1) {
test_buf = (u8 *)TestPattern1_D;
} else {
test_buf = (u8 *)TestPattern0_D;
}
} else { /* Second Pass */
test_buf = (u8 *)TestPattern2_D;
}
SetUpperFSbase(addr);
addr <<= 8;
if((pDCTstat->Status & (1<<SB_128bitmode)) && channel ) {
addr += 8; /* second channel */
test_buf += 8;
}
print_debug_dqs_pair("\t\t\t\t\t\t test_buf = ", (u32)test_buf, " | addr_lo = ", addr, 4);
for (i=0; i<8; i++, addr ++) {
value = read32_fs(addr);
print_debug_dqs_pair("\t\t\t\t\t\t\t\t ", test_buf[i], " | ", value, 4);
if (value == test_buf[i]) {
pDCTstat->DqsRcvEn_Pass |= (1<<i);
} else {
pDCTstat->DqsRcvEn_Pass &= ~(1<<i);
}
}
result = DQS_FAIL;
if (Pass == FirstPass) {
/* if first pass, at least one byte lane pass
* ,then DQS_PASS=1 and will set to related reg.
*/
if(pDCTstat->DqsRcvEn_Pass != 0) {
result = DQS_PASS;
} else {
result = DQS_FAIL;
}
} else {
/* if second pass, at least one byte lane fail
* ,then DQS_FAIL=1 and will set to related reg.
*/
if(pDCTstat->DqsRcvEn_Pass != 0xFF) {
result = DQS_FAIL;
} else {
result = DQS_PASS;
}
}
/* if second pass, we can't find the fail until FFh,
* then let it fail to save the final delay
*/
if((Pass == SecondPass) && (pDCTstat->Status & (1 << SB_DQSRcvLimit))) {
result = DQS_FAIL;
pDCTstat->DqsRcvEn_Pass = 0;
}
/* second pass needs to be inverted
* FIXME? this could be inverted in the above code to start with...
*/
if(Pass == SecondPass) {
if (result == DQS_PASS) {
result = DQS_FAIL;
} else if (result == DQS_FAIL) { /* FIXME: doesn't need to be else if */
result = DQS_PASS;
}
}
return result;
}
static void mct_InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat)
{
/* Initialize the DQS Positions in preparation for
* Receiver Enable Training.
* Write Position is 1/2 Memclock Delay
* Read Position is 1/2 Memclock Delay
*/
u8 i;
for(i=0;i<2; i++){
InitDQSPos4RcvrEn_D(pMCTstat, pDCTstat, i);
}
}
static void InitDQSPos4RcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel)
{
/* Initialize the DQS Positions in preparation for
* Receiver Enable Training.
* Write Position is no Delay
* Read Position is 1/2 Memclock Delay
*/
u8 i, j;
u32 dword;
u8 dn = 4; /* TODO: Rev C could be 4 */
u32 dev = pDCTstat->dev_dct;
u32 index_reg = 0x98 + 0x100 * Channel;
/* FIXME: add Cx support */
dword = 0x00000000;
for(i=1; i<=3; i++) {
for(j=0; j<dn; j++)
/* DIMM0 Write Data Timing Low */
/* DIMM0 Write ECC Timing */
Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
}
/* errata #180 */
dword = 0x2f2f2f2f;
for(i=5; i<=6; i++) {
for(j=0; j<dn; j++)
/* DIMM0 Read DQS Timing Control Low */
Set_NB32_index_wait(dev, index_reg, i + 0x100 * j, dword);
}
dword = 0x0000002f;
for(j=0; j<dn; j++)
/* DIMM0 Read DQS ECC Timing Control */
Set_NB32_index_wait(dev, index_reg, 7 + 0x100 * j, dword);
}
void SetEccDQSRcvrEn_D(struct DCTStatStruc *pDCTstat, u8 Channel)
{
u32 dev;
u32 index_reg;
u32 index;
u8 ChipSel;
u8 *p;
u32 val;
dev = pDCTstat->dev_dct;
index_reg = 0x98 + Channel * 0x100;
index = 0x12;
p = pDCTstat->CH_D_BC_RCVRDLY[Channel];
print_debug_dqs("\t\tSetEccDQSRcvrPos: Channel ", Channel, 2);
for(ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
val = p[ChipSel>>1];
Set_NB32_index_wait(dev, index_reg, index, val);
print_debug_dqs_pair("\t\tSetEccDQSRcvrPos: ChipSel ",
ChipSel, " rcvr_delay ", val, 2);
index += 3;
}
}
static void CalcEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 Channel)
{
u8 ChipSel;
u16 EccDQSLike;
u8 EccDQSScale;
u32 val, val0, val1;
EccDQSLike = pDCTstat->CH_EccDQSLike[Channel];
EccDQSScale = pDCTstat->CH_EccDQSScale[Channel];
for (ChipSel = 0; ChipSel < MAX_CS_SUPPORTED; ChipSel += 2) {
if(mct_RcvrRankEnabled_D(pMCTstat, pDCTstat, Channel, ChipSel)) {
u8 *p;
p = pDCTstat->CH_D_B_RCVRDLY[Channel][ChipSel>>1];
/* DQS Delay Value of Data Bytelane
* most like ECC byte lane */
val0 = p[EccDQSLike & 0x07];
/* DQS Delay Value of Data Bytelane
* 2nd most like ECC byte lane */
val1 = p[(EccDQSLike>>8) & 0x07];
if (!(pDCTstat->Status & (1 << SB_Registered))) {
if(val0 > val1) {
val = val0 - val1;
} else {
val = val1 - val0;
}
val *= ~EccDQSScale;
val >>= 8; /* /256 */
if(val0 > val1) {
val -= val1;
} else {
val += val0;
}
} else {
val = val1 - val0;
val += val1;
}
pDCTstat->CH_D_BC_RCVRDLY[Channel][ChipSel>>1] = val;
}
}
SetEccDQSRcvrEn_D(pDCTstat, Channel);
}
void mctSetEccDQSRcvrEn_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA)
{
u8 Node;
u8 i;
for (Node = 0; Node < MAX_NODES_SUPPORTED; Node++) {
struct DCTStatStruc *pDCTstat;
pDCTstat = pDCTstatA + Node;
if (!pDCTstat->NodePresent)
break;
if (pDCTstat->DCTSysLimit) {
for(i=0; i<2; i++)
CalcEccDQSRcvrEn_D(pMCTstat, pDCTstat, i);
}
}
}
void phyAssistedMemFnceTraining(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstatA)
{
u8 Node = 0;
struct DCTStatStruc *pDCTstat;
/* FIXME: skip for Ax */
while (Node < MAX_NODES_SUPPORTED) {
pDCTstat = pDCTstatA + Node;
if(pDCTstat->DCTSysLimit) {
fenceDynTraining_D(pMCTstat, pDCTstat, 0);
fenceDynTraining_D(pMCTstat, pDCTstat, 1);
}
Node++;
}
}
static void fenceDynTraining_D(struct MCTStatStruc *pMCTstat,
struct DCTStatStruc *pDCTstat, u8 dct)
{
u16 avRecValue;
u32 val;
u32 dev;
u32 index_reg = 0x98 + 0x100 * dct;
u32 index;
/* BIOS first programs a seed value to the phase recovery engine
* (recommended 19) registers.
* Dram Phase Recovery Control Register (F2x[1,0]9C_x[51:50] and
* F2x[1,0]9C_x52.) .
*/
dev = pDCTstat->dev_dct;
for (index = 0x50; index <= 0x52; index ++) {
val = (FenceTrnFinDlySeed & 0x1F);
if (index != 0x52) {
val |= val << 8 | val << 16 | val << 24;
}
Set_NB32_index_wait(dev, index_reg, index, val);
}
/* Set F2x[1,0]9C_x08[PhyFenceTrEn]=1. */
val = Get_NB32_index_wait(dev, index_reg, 0x08);
val |= 1 << PhyFenceTrEn;
Set_NB32_index_wait(dev, index_reg, 0x08, val);
/* Wait 200 MEMCLKs. */
mct_Wait(50000); /* wait 200us */
/* Clear F2x[1,0]9C_x08[PhyFenceTrEn]=0. */
val = Get_NB32_index_wait(dev, index_reg, 0x08);
val &= ~(1 << PhyFenceTrEn);
Set_NB32_index_wait(dev, index_reg, 0x08, val);
/* BIOS reads the phase recovery engine registers
* F2x[1,0]9C_x[51:50] and F2x[1,0]9C_x52. */
avRecValue = 0;
for (index = 0x50; index <= 0x52; index ++) {
val = Get_NB32_index_wait(dev, index_reg, index);
avRecValue += val & 0x7F;
if (index != 0x52) {
avRecValue += (val >> 8) & 0x7F;
avRecValue += (val >> 16) & 0x7F;
avRecValue += (val >> 24) & 0x7F;
}
}
val = avRecValue / 9;
if (avRecValue % 9)
val++;
avRecValue = val;
/* Write the (averaged value -8) to F2x[1,0]9C_x0C[PhyFence]. */
/* inlined mct_AdjustFenceValue() */
/* TODO: The RBC0 is not supported. */
/* if (pDCTstat->LogicalCPUID & AMD_RB_C0)
avRecValue -= 3;
else
*/
if (pDCTstat->LogicalCPUID & AMD_DR_Cx)
avRecValue -= 8;
else if (pDCTstat->LogicalCPUID & AMD_DR_Bx)
avRecValue -= 8;
val = Get_NB32_index_wait(dev, index_reg, 0x0C);
val &= ~(0x1F << 16);
val |= (avRecValue & 0x1F) << 16;
Set_NB32_index_wait(dev, index_reg, 0x0C, val);
/* Rewrite F2x[1,0]9C_x04-DRAM Address/Command Timing Control Register
* delays (both channels). */
val = Get_NB32_index_wait(dev, index_reg, 0x04);
Set_NB32_index_wait(dev, index_reg, 0x04, val);
}
void mct_Wait(u32 cycles)
{
u32 saved;
u32 hi, lo, msr;
/* Wait # of 50ns cycles
This seems like a hack to me... */
cycles <<= 3; /* x8 (number of 1.25ns ticks) */
msr = 0x10; /* TSC */
_RDMSR(msr, &lo, &hi);
saved = lo;
do {
_RDMSR(msr, &lo, &hi);
} while (lo - saved < cycles );
}