blob: 8c8e66a807c031388eff3d318a1fd1a646408c1f [file] [log] [blame]
/*
* This file is part of the coreboot project.
*
* Copyright (C) 2012 secunet Security Networks AG
* (Written by Nico Huber <nico.huber@secunet.com> for secunet)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include <arch/io.h>
#include <pc80/mc146818rtc.h>
#include <console/console.h>
#include "gm45.h"
typedef struct {
u32 addr[RANKS_PER_CHANNEL];
unsigned count;
} address_bunch_t;
/* Read Training. */
#define CxRDTy_MCHBAR(ch, bl) (0x14b0 + (ch * 0x0100) + ((7 - bl) * 4))
#define CxRDTy_T_SHIFT 20
#define CxRDTy_T_MASK (0xf << CxRDTy_T_SHIFT)
#define CxRDTy_T(t) ((t << CxRDTy_T_SHIFT) & CxRDTy_T_MASK)
#define CxRDTy_P_SHIFT 16
#define CxRDTy_P_MASK (0x7 << CxRDTy_P_SHIFT)
#define CxRDTy_P(p) ((p << CxRDTy_P_SHIFT) & CxRDTy_P_MASK)
static const u32 read_training_schedule[] = {
0xfefefefe, 0x7f7f7f7f, 0xbebebebe, 0xdfdfdfdf,
0xeeeeeeee, 0xf7f7f7f7, 0xfafafafa, 0xfdfdfdfd,
0x00000000, 0x81818181, 0x40404040, 0x21212121,
0x10101010, 0x09090909, 0x04040404, 0x03030303,
0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
0x00000000, 0xffffffff, 0x00000000, 0x00000000,
};
#define READ_TIMING_P_SHIFT 3
#define READ_TIMING_P_BOUND (1 << READ_TIMING_P_SHIFT)
#define READ_TIMING_T_BOUND 14
typedef struct {
int t;
int p;
} read_timing_t;
static void normalize_read_timing(read_timing_t *const timing)
{
while (timing->p >= READ_TIMING_P_BOUND) {
timing->t++;
timing->p -= READ_TIMING_P_BOUND;
}
while (timing->p < 0) {
timing->t--;
timing->p += READ_TIMING_P_BOUND;
}
if ((timing->t < 0) || (timing->t >= READ_TIMING_T_BOUND))
die("Timing under-/overflow during read training.\n");
}
static void program_read_timing(const int ch, const int lane,
read_timing_t *const timing)
{
normalize_read_timing(timing);
u32 reg = MCHBAR32(CxRDTy_MCHBAR(ch, lane));
reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
reg |= CxRDTy_T(timing->t) | CxRDTy_P(timing->p);
MCHBAR32(CxRDTy_MCHBAR(ch, lane)) = reg;
}
/* Returns 1 on success, 0 on failure. */
static int read_training_test(const int channel, const int lane,
const address_bunch_t *const addresses)
{
int i;
const int lane_offset = lane & 4;
const int lane_mask = 0xff << ((lane & ~4) << 3);
for (i = 0; i < addresses->count; ++i) {
unsigned int offset;
for (offset = lane_offset; offset < 320; offset += 8) {
const u32 read = read32(addresses->addr[i] + offset);
const u32 good = read_training_schedule[offset >> 3];
if ((read & lane_mask) != (good & lane_mask))
return 0;
}
}
return 1;
}
static void read_training_per_lane(const int channel, const int lane,
const address_bunch_t *const addresses)
{
read_timing_t lower, upper;
MCHBAR32(CxRDTy_MCHBAR(channel, lane)) |= 3 << 25;
/* Search lower bound. */
lower.t = 0;
lower.p = 0;
program_read_timing(channel, lane, &lower);
/* Coarse search for good t. */
while (!read_training_test(channel, lane, addresses)) {
++lower.t;
program_read_timing(channel, lane, &lower);
}
/* Step back, then fine search for good p. */
if (lower.t > 0) {
--lower.t;
program_read_timing(channel, lane, &lower);
while (!read_training_test(channel, lane, addresses)) {
++lower.p;
program_read_timing(channel, lane, &lower);
}
}
/* Search upper bound. */
upper.t = lower.t + 1;
upper.p = lower.p;
program_read_timing(channel, lane, &upper);
if (!read_training_test(channel, lane, addresses))
die("Read training failed: limits too narrow.\n");
/* Coarse search for bad t. */
do {
++upper.t;
program_read_timing(channel, lane, &upper);
} while (read_training_test(channel, lane, addresses));
/* Fine search for bad p. */
--upper.t;
program_read_timing(channel, lane, &upper);
while (read_training_test(channel, lane, addresses)) {
++upper.p;
program_read_timing(channel, lane, &upper);
}
/* Calculate and program mean value. */
lower.p += lower.t << READ_TIMING_P_SHIFT;
upper.p += upper.t << READ_TIMING_P_SHIFT;
const int mean_p = (lower.p + upper.p) >> 1;
/* lower becomes the mean value. */
lower.t = mean_p >> READ_TIMING_P_SHIFT;
lower.p = mean_p & (READ_TIMING_P_BOUND - 1);
program_read_timing(channel, lane, &lower);
printk(BIOS_DEBUG, "Final timings for byte lane %d on channel %d: "
"%d.%d\n", lane, channel, lower.t, lower.p);
}
static void perform_read_training(const dimminfo_t *const dimms)
{
int ch, i;
FOR_EACH_POPULATED_CHANNEL(dimms, ch) {
address_bunch_t addresses = { { 0, }, 0 };
FOR_EACH_POPULATED_RANK_IN_CHANNEL(dimms, ch, i)
addresses.addr[addresses.count++] =
raminit_get_rank_addr(ch, i);
for (i = 0; i < addresses.count; ++i) {
/* Write test pattern. */
unsigned int offset;
for (offset = 0; offset < 320; offset += 4)
write32(addresses.addr[i] + offset,
read_training_schedule[offset >> 3]);
}
for (i = 0; i < 8; ++i)
read_training_per_lane(ch, i, &addresses);
}
}
static void read_training_store_results(void)
{
u8 bytes[TOTAL_CHANNELS * 8];
int ch, i;
/* Store one timing pair in one byte each. */
FOR_EACH_CHANNEL(ch) {
for (i = 0; i < 8; ++i) {
const u32 bl_reg = MCHBAR32(CxRDTy_MCHBAR(ch, i));
bytes[(ch * 8) + i] =
(((bl_reg & CxRDTy_T_MASK) >> CxRDTy_T_SHIFT)
<< 4) |
((bl_reg & CxRDTy_P_MASK) >> CxRDTy_P_SHIFT);
}
}
/* Store everything in CMOS above 128 bytes. */
for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
cmos_write(bytes[i], CMOS_READ_TRAINING + i);
}
static void read_training_restore_results(void)
{
u8 bytes[TOTAL_CHANNELS * 8];
int ch, i;
/* Read from CMOS. */
for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
bytes[i] = cmos_read(CMOS_READ_TRAINING + i);
/* Program restored results. */
FOR_EACH_CHANNEL(ch) {
for (i = 0; i < 8; ++i) {
const int t = bytes[(ch * 8) + i] >> 4;
const int p = bytes[(ch * 8) + i] & 7;
u32 bl_reg = MCHBAR32(CxRDTy_MCHBAR(ch, i));
bl_reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
bl_reg |= (3 << 25) | CxRDTy_T(t) | CxRDTy_P(p);
MCHBAR32(CxRDTy_MCHBAR(ch, i)) = bl_reg;
printk(BIOS_DEBUG, "Restored timings for byte lane "
"%d on channel %d: %d.%d\n", i, ch, t, p);
}
}
}
void raminit_read_training(const dimminfo_t *const dimms, const int s3resume)
{
if (!s3resume) {
perform_read_training(dimms);
read_training_store_results();
} else {
read_training_restore_results();
}
raminit_reset_readwrite_pointers();
}
/* Write Training. */
#define CxWRTy_T_SHIFT 28
#define CxWRTy_T_MASK (0xf << CxWRTy_T_SHIFT)
#define CxWRTy_T(t) ((t << CxWRTy_T_SHIFT) & CxWRTy_T_MASK)
#define CxWRTy_P_SHIFT 24
#define CxWRTy_P_MASK (0x7 << CxWRTy_P_SHIFT)
#define CxWRTy_P(p) ((p << CxWRTy_P_SHIFT) & CxWRTy_P_MASK)
#define CxWRTy_F_SHIFT 18
#define CxWRTy_F_MASK (0x3 << CxWRTy_F_SHIFT)
#define CxWRTy_F(f) ((f << CxWRTy_F_SHIFT) & CxWRTy_F_MASK)
#define CxWRTy_D_SHIFT 16
#define CxWRTy_D_MASK (0x3 << CxWRTy_D_SHIFT)
#define CxWRTy_BELOW_D (0x3 << CxWRTy_D_SHIFT)
#define CxWRTy_ABOVE_D (0x1 << CxWRTy_D_SHIFT)
static const u32 write_training_schedule[] = {
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
0x03030303, 0x04040404, 0x09090909, 0x10101010,
0x21212121, 0x40404040, 0x81818181, 0x00000000,
0x03030303, 0x04040404, 0x09090909, 0x10101010,
0x21212121, 0x40404040, 0x81818181, 0x00000000,
0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
};
/* for raw card types A, B and C: MEM_CLOCK_1067MT? X group X lower/upper */
static const u32 write_training_bytelane_masks_abc[2][4][2] = {
{ /* clock < MEM_CLOCK_1067MT */
{ 0xffffffff, 0x00000000 }, { 0x00000000, 0x00000000 },
{ 0x00000000, 0xffffffff }, { 0x00000000, 0x00000000 },
},
{ /* clock == MEM_CLOCK_1067MT */
{ 0x0000ffff, 0x00000000 }, { 0xffff0000, 0x00000000 },
{ 0x00000000, 0x0000ffff }, { 0x00000000, 0xffff0000 },
},
};
/* for raw card type F: group X lower/upper */
static const u32 write_training_bytelane_masks_f[4][2] = {
{ 0xff00ff00, 0x00000000 }, { 0x00ff00ff, 0x00000000 },
{ 0x00000000, 0xff00ff00 }, { 0x00000000, 0x00ff00ff },
};
#define WRITE_TIMING_P_SHIFT 3
#define WRITE_TIMING_P_BOUND (1 << WRITE_TIMING_P_SHIFT)
#define WRITE_TIMING_F_BOUND 4
typedef struct {
int f;
int t;
const int t_bound;
int p;
} write_timing_t;
static void normalize_write_timing(write_timing_t *const timing)
{
while (timing->p >= WRITE_TIMING_P_BOUND) {
timing->t++;
timing->p -= WRITE_TIMING_P_BOUND;
}
while (timing->p < 0) {
timing->t--;
timing->p += WRITE_TIMING_P_BOUND;
}
while (timing->t >= timing->t_bound) {
timing->f++;
timing->t -= timing->t_bound;
}
while (timing->t < 0) {
timing->f--;
timing->t += timing->t_bound;
}
if ((timing->f < 0) || (timing->f >= WRITE_TIMING_F_BOUND))
die("Timing under-/overflow during write training.\n");
}
static void program_write_timing(const int ch, const int group,
write_timing_t *const timing, int memclk1067)
{
/* MEM_CLOCK_1067MT? X lower/upper */
const u32 d_bounds[2][2] = { { 1, 6 }, { 2, 9 } };
normalize_write_timing(timing);
const int f = timing->f;
const int t = timing->t;
const int p = (memclk1067 && (((t == 9) && (timing->p >= 4)) ||
((t == 10) && (timing->p < 4))))
? 4 : timing->p;
const int d =
(t <= d_bounds[memclk1067][0]) ? CxWRTy_BELOW_D :
((t > d_bounds[memclk1067][1]) ? CxWRTy_ABOVE_D : 0);
u32 reg = MCHBAR32(CxWRTy_MCHBAR(ch, group));
reg &= ~(CxWRTy_T_MASK | CxWRTy_P_MASK | CxWRTy_F_MASK);
reg &= ~CxWRTy_D_MASK;
reg |= CxWRTy_T(t) | CxWRTy_P(p) | CxWRTy_F(f) | d;
MCHBAR32(CxWRTy_MCHBAR(ch, group)) = reg;
}
/* Returns 1 on success, 0 on failure. */
static int write_training_test(const address_bunch_t *const addresses,
const u32 *const masks)
{
int i, ret = 0;
const u32 mmarb0 = MCHBAR32(0x0220);
const u8 wrcctl = MCHBAR8(0x0218);
MCHBAR32(0x0220) |= 0xf << 28;
MCHBAR8(0x0218) |= 0x1 << 4;
for (i = 0; i < addresses->count; ++i) {
const unsigned int addr = addresses->addr[i];
unsigned int off;
for (off = 0; off < 640; off += 8) {
const u32 pattern = write_training_schedule[off >> 3];
write32(addr + off, pattern);
write32(addr + off + 4, pattern);
}
MCHBAR8(0x78) |= 1;
for (off = 0; off < 640; off += 8) {
const u32 good = write_training_schedule[off >> 3];
const u32 read1 = read32(addr + off);
if ((read1 & masks[0]) != (good & masks[0]))
goto _bad_timing_out;
const u32 read2 = read32(addr + off + 4);
if ((read2 & masks[1]) != (good & masks[1]))
goto _bad_timing_out;
}
}
ret = 1;
_bad_timing_out:
MCHBAR32(0x0220) = mmarb0;
MCHBAR8(0x0218) = wrcctl;
return ret;
}
static void write_training_per_group(const int ch, const int group,
const address_bunch_t *const addresses,
const u32 masks[][2], const int memclk1067)
{
const int t_bound = memclk1067 ? 12 : 11;
write_timing_t lower = { 0, 0, t_bound, 0 },
upper = { 0, 0, t_bound, 0 };
/* Search lower bound. */
const u32 reg = MCHBAR32(CxWRTy_MCHBAR(ch, group));
lower.t = (reg >> 12) & 0xf;
lower.p = (reg >> 8) & 0x7;
lower.f = ((reg >> 2) & 0x3) - 1;
program_write_timing(ch, group, &lower, memclk1067);
/* Coarse search for good t. */
while (!write_training_test(addresses, masks[group])) {
++lower.t;
program_write_timing(ch, group, &lower, memclk1067);
}
/* Fine search for good p. */
--lower.t;
program_write_timing(ch, group, &lower, memclk1067);
while (!write_training_test(addresses, masks[group])) {
++lower.p;
program_write_timing(ch, group, &lower, memclk1067);
}
/* Search upper bound. */
upper.t = lower.t + 3;
upper.p = lower.p;
upper.f = lower.f;
program_write_timing(ch, group, &upper, memclk1067);
if (!write_training_test(addresses, masks[group]))
die("Write training failed; limits too narrow.\n");
/* Coarse search for good t. */
while (write_training_test(addresses, masks[group])) {
++upper.t;
program_write_timing(ch, group, &upper, memclk1067);
}
/* Fine search for good p. */
--upper.t;
program_write_timing(ch, group, &upper, memclk1067);
while (write_training_test(addresses, masks[group])) {
++upper.p;
program_write_timing(ch, group, &upper, memclk1067);
}
/* Calculate and program mean value. */
lower.t += lower.f * lower.t_bound;
lower.p += lower.t << WRITE_TIMING_P_SHIFT;
upper.t += upper.f * upper.t_bound;
upper.p += upper.t << WRITE_TIMING_P_SHIFT;
/* lower becomes the mean value. */
const int mean_p = (lower.p + upper.p) >> 1;
lower.f = mean_p / (lower.t_bound << WRITE_TIMING_P_SHIFT);
lower.t = (mean_p >> WRITE_TIMING_P_SHIFT) % lower.t_bound;
lower.p = mean_p & (WRITE_TIMING_P_BOUND - 1);
program_write_timing(ch, group, &lower, memclk1067);
printk(BIOS_DEBUG, "Final timings for group %d"
" on channel %d: %d.%d.%d\n",
group, ch, lower.f, lower.t, lower.p);
}
static void perform_write_training(const int memclk1067,
const dimminfo_t *const dimms)
{
const int cardF[] = { dimms[0].card_type == 0xf,
dimms[1].card_type == 0xf };
int ch, r, group;
address_bunch_t addr[2] = { { { 0, }, 0 }, { { 0, }, 0 }, };
/* Add check if channel A is populated, i.e. if cardF[0] is valid.
* Otherwise we would write channel A registers when DIMM in channel B
* is of raw card type A, B or C (cardF[1] == 0) even if channel A is
* not populated.
* Needs raw card type A, B or C for testing. */
if ((dimms[0].card_type != 0) && (cardF[0] == cardF[1])) {
/* Common path for both channels. */
FOR_EACH_POPULATED_RANK(dimms, ch, r)
addr[0].addr[addr[0].count++] =
raminit_get_rank_addr(ch, r);
} else {
FOR_EACH_POPULATED_RANK(dimms, ch, r)
addr[ch].addr[addr[ch].count++] =
raminit_get_rank_addr(ch, r);
}
FOR_EACH_CHANNEL(ch) if (addr[ch].count > 0) {
const u32 (*const masks)[2] = (!cardF[ch])
? write_training_bytelane_masks_abc[memclk1067]
: write_training_bytelane_masks_f;
for (group = 0; group < 4; ++group) {
if (!masks[group][0] && !masks[group][1])
continue;
write_training_per_group(
ch, group, &addr[ch], masks, memclk1067);
}
}
}
static void write_training_store_results(void)
{
u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
int ch, i;
/* Store one T/P pair in one, F in the other byte. */
/* We could save six bytes by putting all F values in two bytes. */
FOR_EACH_CHANNEL(ch) {
for (i = 0; i < 4; ++i) {
const u32 reg = MCHBAR32(CxWRTy_MCHBAR(ch, i));
bytes[(ch * 8) + (i * 2)] =
(((reg & CxWRTy_T_MASK)
>> CxWRTy_T_SHIFT) << 4) |
((reg & CxWRTy_P_MASK) >> CxWRTy_P_SHIFT);
bytes[(ch * 8) + (i * 2) + 1] =
((reg & CxWRTy_F_MASK) >> CxWRTy_F_SHIFT);
}
}
/* Store everything in CMOS above 128 bytes. */
for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
cmos_write(bytes[i], CMOS_WRITE_TRAINING + i);
}
static void write_training_restore_results(const int memclk1067)
{
const int t_bound = memclk1067 ? 12 : 11;
u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
int ch, i;
/* Read from CMOS. */
for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
bytes[i] = cmos_read(CMOS_WRITE_TRAINING + i);
/* Program with original program_write_timing(). */
FOR_EACH_CHANNEL(ch) {
for (i = 0; i < 4; ++i) {
write_timing_t timing = { 0, 0, t_bound, 0 };
timing.f = bytes[(ch * 8) + (i * 2) + 1] & 3;
timing.t = bytes[(ch * 8) + (i * 2)] >> 4;
timing.p = bytes[(ch * 8) + (i * 2)] & 7;
program_write_timing(ch, i, &timing, memclk1067);
printk(BIOS_DEBUG, "Restored timings for group %d "
"on channel %d: %d.%d.%d\n",
i, ch, timing.f, timing.t, timing.p);
}
}
}
void raminit_write_training(const mem_clock_t ddr3clock,
const dimminfo_t *const dimms,
const int s3resume)
{
const int memclk1067 = ddr3clock == MEM_CLOCK_1067MT;
if (!s3resume) {
perform_write_training(memclk1067, dimms);
write_training_store_results();
} else {
write_training_restore_results(memclk1067);
}
raminit_reset_readwrite_pointers();
}