首先我们看sd_revalidate_disk(),这个函数很重要,一定程度上来说,正是这个函数从硬件和软件两个方面掀起了我们了解scsi磁盘的性高潮.这个函数它不是一个函数在战斗,它完全是贾宝玉林黛玉方世玉附体,由这一个函数可以牵连出N个函数.而这N个函数中的一些函数本身又有好几百行,所以我们算是陷进去了.

   1496 /**

   1497  *      sd_revalidate_disk - called the first time a new disk is seen,

   1498  *      performs disk spin up, read_capacity, etc.

   1499  *      @disk: struct gendisk we care about

   1500  **/

   1501 static int sd_revalidate_disk(struct gendisk *disk)

   1502 {

   1503         struct scsi_disk *sdkp = scsi_disk(disk);

   1504         struct scsi_device *sdp = sdkp->device;

   1505         unsigned char *buffer;

   1506         unsigned ordered;

   1507

   1508         SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,

   1509                                       "sd_revalidate_disk/n"));

   1510

   1511         /*

   1512          * If the device is offline, don't try and read capacity or any

   1513          * of the other niceties.

   1514          */

   1515         if (!scsi_device_online(sdp))

   1516                 goto out;

   1517

   1518         buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL | __GFP_DMA);

   1519         if (!buffer) {

   1520                 sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory "

   1521                           "allocation failure./n");

   1522                 goto out;

   1523         }

   1524

   1525         /* defaults, until the device tells us otherwise */

   1526         sdp->sector_size = 512;

   1527         sdkp->capacity = 0;

   1528         sdkp->media_present = 1;

   1529         sdkp->write_prot = 0;

   1530         sdkp->WCE = 0;

   1531         sdkp->RCD = 0;

   1532

   1533         sd_spinup_disk(sdkp);

   1534

   1535         /*

   1536          * Without media there is no reason to ask; moreover, some devices

   1537          * react badly if we do.

   1538          */

   1539         if (sdkp->media_present) {

   1540                 sd_read_capacity(sdkp, buffer);

   1541                 sd_read_write_protect_flag(sdkp, buffer);

   1542                 sd_read_cache_type(sdkp, buffer);

   1543         }

   1544

   1545         /*

   1546          * We now have all cache related info, determine how we deal

   1547          * with ordered requests.  Note that as the current SCSI

   1548          * dispatch function can alter request order, we cannot use

   1549          * QUEUE_ORDERED_TAG_* even when ordered tag is supported.

   1550          */

   1551         if (sdkp->WCE)

   1552                 ordered = sdkp->DPOFUA

   1553                         ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;

   1554         else

   1555                 ordered = QUEUE_ORDERED_DRAIN;

   1556

   1557         blk_queue_ordered(sdkp->disk->queue, ordered, sd_prepare_flush);

   1558

   1559         set_capacity(disk, sdkp->capacity);

   1560         kfree(buffer);

   1561

   1562  out:

   1563         return 0;

   1564 }

用我们经常用错的一个成语来说,就是首当其冲的函数便是sd_spinup_disk().

   1005 /*

   1006  * spinup disk - called only in sd_revalidate_disk()

   1007  */

   1008 static void

   1009 sd_spinup_disk(struct scsi_disk *sdkp)

   1010 {

   1011         unsigned char cmd[10];

   1012         unsigned long spintime_expire = 0;

   1013         int retries, spintime;

   1014         unsigned int the_result;

   1015         struct scsi_sense_hdr sshdr;

   1016         int sense_valid = 0;

   1017

   1018         spintime = 0;

   1019

   1020         /* Spin up drives, as required.  Only do this at boot time */

   1021         /* Spinup needs to be done for module loads too. */

   1022         do {

   1023                 retries = 0;

   1024

   1025                 do {

   1026                         cmd[0] = TEST_UNIT_READY;

   1027                         memset((void *) &cmd[1], 0, 9);

   1028

   1029                         the_result = scsi_execute_req(sdkp->device, cmd,

   1030                                                       DMA_NONE, NULL, 0,

   1031                                                       &sshdr, SD_TIMEOUT,

   1032                                                       SD_MAX_RETRIES);

   1033

   1034                         /*

   1035                          * If the drive has indicated to us that it

   1036                          * doesn't have any media in it, don't bother

   1037                          * with any more polling.

   1038                          */

   1039                         if (media_not_present(sdkp, &sshdr))

   1040                                 return;

   1041

   1042                         if (the_result)

   1043                                 sense_valid = scsi_sense_valid(&sshdr);

   1044                         retries++;

   1045                 } while (retries < 3 &&

   1046                          (!scsi_status_is_good(the_result) ||

   1047                           ((driver_byte(the_result) & DRIVER_SENSE) &&

   1048                           sense_valid && sshdr.sense_key == UNIT_ATTENTION)));

   1049

   1050                 if ((driver_byte(the_result) & DRIVER_SENSE) == 0) {

   1051                         /* no sense, TUR either succeeded or failed

   1052                          * with a status error */

   1053                         if(!spintime && !scsi_status_is_good(the_result)) {

   1054                                 sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready/n");

   1055                                 sd_print_result(sdkp, the_result);

   1056                         }

   1057                         break;

   1058                 }

   1059

   1060                 /*

   1061                  * The device does not want the automatic start to be issued.

   1062                  */

   1063                 if (sdkp->device->no_start_on_add) {

   1064                         break;

   1065                 }

   1066

   1067                 /*

   1068                  * If manual intervention is required, or this is an

   1069                  * absent USB storage device, a spinup is meaningless.

   1070                  */

   1071                 if (sense_valid &&

   1072                     sshdr.sense_key == NOT_READY &&

   1073                     sshdr.asc == 4 && sshdr.ascq == 3) {

   1074                         break;          /* manual intervention required */

   1075

   1076                 /*

   1077                  * Issue command to spin up drive when not ready

   1078                  */

   1079                 } else if (sense_valid && sshdr.sense_key == NOT_READY) {

   1080                         if (!spintime) {

   1081                                 sd_printk(KERN_NOTICE, sdkp, "Spinning up disk...");

   1082                                 cmd[0] = START_STOP;

   1083                                 cmd[1] = 1;     /* Return immediately */

   1084                                 memset((void *) &cmd[2], 0, 8);

   1085                                 cmd[4] = 1;     /* Start spin cycle */

   1086                                 scsi_execute_req(sdkp->device, cmd, DMA_NONE,

   1087                                                  NULL, 0, &sshdr,

   1088                                                  SD_TIMEOUT, SD_MAX_RETRIES);

   1089                                 spintime_expire = jiffies + 100 * HZ;

   1090                                 spintime = 1;

   1091                         }

   1092                         /* Wait 1 second for next try */

   1093                         msleep(1000);

   1094                         printk(".");

   1095

   1096                 /*

   1097                  * Wait for USB flash devices with slow firmware.

   1098                  * Yes, this sense key/ASC combination shouldn't

   1099                  * occur here.  It's characteristic of these devices.

   1100                  */

   1101                 } else if (sense_valid &&

   1102                                 sshdr.sense_key == UNIT_ATTENTION &&

   1103                                 sshdr.asc == 0x28) {

   1104                         if (!spintime) {

   1105                                 spintime_expire = jiffies + 5 * HZ;

   1106                                 spintime = 1;

   1107                         }

   1108                         /* Wait 1 second for next try */

   1109                         msleep(1000);

   1110                 } else {

   1111                         /* we don't understand the sense code, so it's

   1112                          * probably pointless to loop */

   1113                         if(!spintime) {

   1114                                 sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready/n");

   1115                                 sd_print_sense_hdr(sdkp, &sshdr);

   1116                         }

   1117                         break;

   1118                 }

   1119

   1120         } while (spintime && time_before_eq(jiffies, spintime_expire));

   1121

   1122         if (spintime) {

   1123                 if (scsi_status_is_good(the_result))

   1124                         printk("ready/n");

   1125                 else

   1126                         printk("not responding.../n");

   1127         }

   1128 }

顾名思义,spinup_disk就是让磁盘转起来.然而,要看明白这个函数,你就不得不对SCSI spec有一定了解了.

这个函数虽然复杂,但是我们本着擒贼先擒王的思想,重点关注这个函数中最有价值的那行代码,没错,即使是曲阳路易买得超市门口看自行车的大妈都知道,这个函数中最有价值的那行代码一定是1029,scsi_execute_req()函数的调用.这个函数算是scsi核心层提供的,咱们只管调用不用管实现.我们在include/scsi/scsi_device.h中能找到它的声明:

    297 extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,

    298   int data_direction, void *buffer, unsigned bufflen,

    299   struct scsi_sense_hdr *, int timeout, int retries);

usb核心层一样,scsi核心层也提供了大量的函数让我们调用,这些函数极大的便利了我们编写scsi设备驱动程序.我们只要准备好参数传递给这个函数,然后就万事大吉了,等着判断函数返回值就是了,至于需要传递的数据,则已经被填充在我们的参数中的buffer里边了.这就好比我每天上班的时候把自行车停在西直门城铁站外,到了晚上下班回来的时候,自行车框里自然而然的就被填充满了,什么都有,香烟盒,卫生纸,吃剩的苹果,嚼过的口香糖,偶尔还有用过的避孕套,总而言之,首都人民的热情一次次的让我感动得泪流满面,让我觉得北漂的日子并不孤独.

这个函数说白了就是执行一个scsi命令,其第一个参数不必多说,就是我们的struct scsi_device的结构体指针,咱们这个故事里就这么一个.第二个参数则是代表着命令,cmd,就是command.其实每一个参数的意思都很明了.

咱们结合我们的代码来看我们具体传递了怎样的参数.第一个sdkp->device这没得说,第二个,cmd,咱们在1011行申请的一个unsigned char类型的数组,总共10个元素,1026行给赋了值为TEST_UNIT_READY.Test Unit Ready就是一个很基本的SCSI命令.DMA_NONE代表传输方向,bufferbufflen咱们用不上,因为这个命令就是测试设备准备好了没有,不需要传递什么数据.

所以正常来讲,咱们这么一调用scsi_execute_req()以执行这个Test Unit Ready命令,返回的结果基本上都是好的,除非设备真的有毛病.

当然你要说有没有出错的时候,那当然也是有的.比如下面这个例子,

[root@localhost dev]# ls sd*

sda  sda1  sda10  sda11  sda12  sda13  sda14  sda2  sda3  sda5  sda6  sda7  sda8  sda9  sdb  sdc sdd  sde  sdf

[root@localhost ~]# sg_turs /dev/sda

Completed 1 Test Unit Ready commands with 0 errors

[root@localhost ~]# sg_turs /dev/sdb

Completed 1 Test Unit Ready commands with 0 errors

[root@localhost ~]# sg_turs /dev/sdc

Completed 1 Test Unit Ready commands with 0 errors

[root@localhost ~]# sg_turs /dev/sde

Completed 1 Test Unit Ready commands with 0 errors

[root@localhost ~]# sg_turs /dev/sdf

test unit ready:  Fixed format, current;  Sense key: Not Ready

 Additional sense: Medium not present

Completed 1 Test Unit Ready commands with 1 errors

这里sg_turs这个命令就是用来手工发送Test Unit Ready用的.不过要使用这个命令,你得安装sg3_utils系列软件包.

[root@localhost dev]# rpm -qa | grep sg3_utils

sg3_utils-devel-1.20-2.1

sg3_utils-1.20-2.1

sg3_utils-libs-1.20-2.1

我们看到在我的五块硬盘中,前四块都没有问题,但是第六块就报错了.所以在执行完命令之后,我们用the_result记录下结果,并且在1046行调用scsi_status_is_good()来判断结果.关于scsi_status_is_good()以及和它相关的一些宏定义于include/scsi/scsi.h文件中:

    125 /*

    126  *  SCSI Architecture Model (SAM) Status codes. Taken from SAM-3 draft

    127  *  T10/1561-D Revision 4 Draft dated 7th November 2002.

    128  */

    129 #define SAM_STAT_GOOD            0x00

    130 #define SAM_STAT_CHECK_CONDITION 0x02

    131 #define SAM_STAT_CONDITION_MET   0x04

    132 #define SAM_STAT_BUSY            0x08

    133 #define SAM_STAT_INTERMEDIATE    0x10

    134 #define SAM_STAT_INTERMEDIATE_CONDITION_MET 0x14

    135 #define SAM_STAT_RESERVATION_CONFLICT 0x18

    136 #define SAM_STAT_COMMAND_TERMINATED 0x22        /* obsolete in SAM-3 */

    137 #define SAM_STAT_TASK_SET_FULL   0x28

    138 #define SAM_STAT_ACA_ACTIVE      0x30

    139 #define SAM_STAT_TASK_ABORTED    0x40

    140

    141 /** scsi_status_is_good - check the status return.

    142  *

    143  * @status: the status passed up from the driver (including host and

    144  *          driver components)

    145  *

    146  * This returns true for known good conditions that may be treated as

    147  * command completed normally

    148  */

    149 static inline int scsi_status_is_good(int status)

    150 {

    151         /*

    152          * FIXME: bit0 is listed as reserved in SCSI-2, but is

    153          * significant in SCSI-3.  For now, we follow the SCSI-2

    154          * behaviour and ignore reserved bits.

    155          */

    156         status &= 0xfe;

    157         return ((status == SAM_STAT_GOOD) ||

    158                 (status == SAM_STAT_INTERMEDIATE) ||

    159                 (status == SAM_STAT_INTERMEDIATE_CONDITION_MET) ||

    160                 /* FIXME: this is obsolete in SAM-3 */

    161                 (status == SAM_STAT_COMMAND_TERMINATED));

    162 }

上面的那些宏被称为状态码, scsi_execute_req()的返回值就是这些状态码中的一个.而其中可以被认为是good的状态就是scsi_status_is_good函数中列出来的这四种,当然理论上来说最理想的就是SAM_STAT_GOOD,而另外这几种也勉强算是可以接受,将就将就的让它过去.

不过有一点必须明白的是,the_result和状态码还是有区别的,毕竟状态码只有那么多,8位来表示足矣,the_result我们看到是unsigned int,显然它不只是8,于是我们就充分利用资源,因此就有了下面这些宏,

    358 /*

    359  *  Use these to separate status msg and our bytes

    360  *

    361  *  These are set by:

    362  *

    363  *      status byte = set from target device

    364  *      msg_byte    = return status from host adapter itself.

    365  *      host_byte   = set by low-level driver to indicate status.

    366  *      driver_byte = set by mid-level.

    367  */

    368 #define status_byte(result) (((result) >> 1) & 0x7f)

    369 #define msg_byte(result)    (((result) >> 8) & 0xff)

    370 #define host_byte(result)   (((result) >> 16) & 0xff)

    371 #define driver_byte(result) (((result) >> 24) & 0xff)

    372 #define suggestion(result)  (driver_byte(result) & SUGGEST_MASK)

也就是说除了最低的那个byte是作为status byte,剩下的byte我们也没浪费,它们都被用来承载信息,其中driver_byte,bit23bit31,8位被用来承载mid-level设置的信息.而这里用它和DRIVER_SENSE相与,则判断的是是否有sense data,我们当初在usb-storage故事中就说过,scsi世界里的sense data就是错误信息.这里1025行至1048行的这个do-while循环就是如果不成功就最多重复三次,循环结束了之后,1050行再次判断有没有sense data,如果没有,则说明也许成功了.

Scsi子系统最无耻的地方就在于错误判断的代码特别的多.而针对sense data的处理则是错误判断的一部分.

      8 /*

      9  * This is a slightly modified SCSI sense "descriptor" format header.

     10  * The addition is to allow the 0x70 and 0x71 response codes. The idea

     11  * is to place the salient data from either "fixed" or "descriptor" sense

     12  * format into one structure to ease application processing.

     13  *

     14  * The original sense buffer should be kept around for those cases

     15  * in which more information is required (e.g. the LBA of a MEDIUM ERROR).

     16  */

     17 struct scsi_sense_hdr {         /* See SPC-3 section 4.5 */

     18         u8 response_code;       /* permit: 0x0, 0x70, 0x71, 0x72, 0x73 */

     19         u8 sense_key;

     20         u8 asc;

     21         u8 ascq;

     22         u8 byte4;

     23         u8 byte5;

     24         u8 byte6;

     25         u8 additional_length;   /* always 0 for fixed sense format */

     26 };

     27

     28 static inline int scsi_sense_valid(struct scsi_sense_hdr *sshdr)

     29 {

     30         if (!sshdr)

     31                 return 0;

     32

     33         return (sshdr->response_code & 0x70) == 0x70;

     34 }

这里定义的struct scsi_sense_hdr就是被用来描述一个sense data.”hdr”就是header的意思,因为sense data可能长度比较长,但是其前8bytes是最重要的,所以这部分被叫做header,或者说头部,大多数情况下只要理睬头部就够了.

我们看函数scsi_execute_req()中第六个参数是struct scsi_sense_hdr *sshdr,换言之,如果命令执行出错了,那么sense data就会通过这个参数返回.所以咱们定义了sshdr,然后咱们通过判断它和它的各个成员,来决定下一步.

sense data,最基本的一个元素叫做response_code,它相当于为一个sense data定了性,即它属于哪一个类别,因为sense data毕竟有很多种.response code总共就是8bits,目前使用的值只有70h,71h,72h,73h,其它的像00h6Fh以及74h7Eh这些都是保留的,以备将来之用.所以这里判断的就是response code得是0x70,0x71,0x72,0x73才是valid,否则就是invalid.这就是scsi_sense_valid()做的事情.

关于sense data,事实上,坊间一直流传着一本叫做SCSI Primary Commands(SPC)的秘籍,在这本秘籍的第四章,确切的说是4.5,名字就叫做Sense data,即这一节是专门介绍Sense Data.Sense data中最有意义的东西叫做sense keysense code.这两个概念基本上确定了你这个错误究竟是什么错误.

1048,我们判断sshdrsense_key是不是等于UNIT_ATTENTION,这个信息表示这个设备可能被重置了或者可移动的介质发生了变化,或者更通俗一点说,只要设备发生了一些变化,然后它希望引起主机控制器的关注,比如说设备原本是on-line,突然变成了off-line,或者反过来,设备从off-line回到了on-line.在正式读写设备之前,如果有UNIT_ATTENTION条件,必须把它给清除掉.而这(清除UNIT ATTENTION)也正是Test Unit Ready的工作之一.

而如果sense key等于NOT_READY,则表明这个logical unit不能被访问.(NOT READY: Indicates that the logical unit is not accessible.)而如果sense key等于NOT READY,asc等于04h,ascq等于03h,这表明”Logical Unit Not Ready,Manual Intervention required”.(详见SPC-4,附录D部分)这说明需要人工干预.

当然大多数情况下,应该执行的是1079行这个else if所包含的代码.即磁盘确实应该是NOT_READY,于是我们需要发送下一个命令,START STOP,在另一部江湖武功秘籍名为SCSI Block Commands-2(SBC-2)的书中,5.17节专门介绍了START STOP UNIT这个命令.这个命令简而言之,就相当于电源开关,SBC-2Table 48给出了这个命令的格式:

结合代码看,咱们把cmd[4]设置为1,实际上就等于是把这张图里的START位设置为1.而在SBC-2,这个START位的含义如下:

If the START bit is set to zero, then the logical unit shall transition to the stopped power condition, disable the idle condition timer if it is active (see SPC-3), and disable the standby condition timer if it is active (see SPC-3). If the START bit set to one, then the logical unit shall transition to the active power condition, enable the idle condition timer if it is active, and enable the standby condition timer if it is active.

很明显,这就是真正的电源开关.因此,1086行再次调用scsi_execute_req以执行START STOP UNIT命令,就是真正的让硬盘转起来.或者用郭富城的话说,动起来!

于是我们就很清楚从1022行直到1120行这一百行代码的do-while循环的意思了.其理想情况的流程就是:

1.      软件说:磁盘磁盘我问你,你准备好了没有?

2.      磁盘说:没有!

3.      软件说:磁盘磁盘你听着,你快给我转起来!

4.      软件:睡眠1000毫秒之后重复第一步的问题.(但磁盘这次可能走第二步,也可能走第五步.)

5.      磁盘说:是的,我准备好了,我们时刻准备着.

6.      这时,1057break语句会被执行,从而循环结束.sd_spinup_disk()函数也就结束了它的使命.

7.      在第一次走到第四步的时候,会设置spintime_expire100,即这个时间为软件忍耐极限,磁盘你只要在100秒之内给我动起来,我就既往不咎,倘若给你100秒你还敬酒不吃吃罚酒,那就没办法了,while循环自然结束,1126行这个printk语句执行,告诉上级说,not responding,换言之,这厮没救了,整个一扶不起的阿斗.


原文见:http://blog.csdn.net/fudan_abc/article/details/1922643

GitHub 加速计划 / li / linux-dash
10.39 K
1.2 K
下载
A beautiful web dashboard for Linux
最近提交(Master分支:2 个月前 )
186a802e added ecosystem file for PM2 4 年前
5def40a3 Add host customization support for the NodeJS version 4 年前
Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐