典型的基于tcpip协议套接字方式的网络通信模块层次:

socket api

|

|

v

WS2_32.dll

socket irp

|

|

v

Afd.sys

tdi irp

|

|

v

Tcpip.sys

回调函数接口

|

|

v

Ndis中间层过滤驱动

回调函数接口

|

|

v

小端口驱动

中断交互操作

网卡

应用程序调用WS2_32.dll中的socket apisocket api在内部生成socket irp发给afd.sys这个中间辅助驱动层,afd.syssocket irp转换成tdi irp发给tcpip协议驱动,协议驱动通过注册的回调函数与小端口驱动(中间可能穿插N个中间层过滤驱动),小端口驱动最终通过中断与网卡交互,操作硬件。

其中,协议驱动、中间层驱动、小端口驱动三者之间的交互是通过ndis.sys这个库函数模块实现的,或者说ndis.sys提供了ndis框架,协议驱动、中间层驱动、小端口驱动三者都得遵循这个框架。

为什么网络通信需要这么复杂的分层?答案是为了减轻开发维护管理工作的需要,分层能够提供最大的灵活性。各层的设计人员只需专注自身模块的设计工作,无需担心其他模块是怎么实现的,只需保持接口一致即可。

如应用程序可以调用socket api就可以实现网络通信,而不管底层是如何实现的。使用socket api还可以使得windows上能兼容运行Unix系统上的网络通信程序,ws2_32.dll这个模块中实现了socket接口。

Afd.sys实际上是一个适配层,他可以适配N种协议驱动。

Tcpip.sys是一种协议驱动(其实是一个协议栈驱动),它内部实现了一套协议栈,决定了如何解析从网卡接收到的包,以及以什么格式将应用程序数据发到网卡。只不过tcpip.sys将收到的包按链路层、网络层、传输层分层三层逐层解析。事实上我们可以完全可以自定义、自编写一个协议驱动,按照我们自己的协议来发包、收包(我们的这个自定义协议驱动可以采用分层机制,也可以采用简单的单层机制),这样在发送方电脑和接收方电脑都安装我们的自定义协议驱动后,发送方就可以按照自定义协议发包,接收方就按照约定的格式解包。

如果不考虑中间驱动,协议驱动是直接与小端口驱动交互的。协议驱动从小端口驱动收包,协议驱动发包给小端口驱动,这就是二者之间的交互。他们之间的交互通过ndis框架预约的一套回调函数接口来实现。

下面我们看各层驱动的实现:

一个协议驱动需要在DriverEntry中将自己注册为一个协议驱动,向ndis框架登记、声明自己的协议特征。

一个协议特征记录了协议的名称以及它提供的各个回调函数

4.0版本的ndis协议特征结构如下定义:

typedef struct _NDIS40_PROTOCOL_CHARACTERISTICS

{

  UCHAR
MajorNdisVersion;

  UCHAR
MinorNdisVersion;

  __MINGW_EXTENSION
union {

    UINT
Reserved;

    UINT
Flags;

  };

  OPEN_ADAPTER_COMPLETE_HANDLER
OpenAdapterCompleteHandler;//绑定完成回调函数

  CLOSE_ADAPTER_COMPLETE_HANDLER
CloseAdapterCompleteHandler;//解除绑定完成回调函数

  SEND_COMPLETE_HANDLER
SendCompleteHandler;//发送完成回调函数

  TRANSFER_DATA_COMPLETE_HANDLER
TransferDataCompleteHandler;//转移数据完成回调函数

  RESET_COMPLETE_HANDLER
ResetCompleteHandler;

  REQUEST_COMPLETE_HANDLER
RequestCompleteHandler;//ndis请求完成回调函数

  RECEIVE_HANDLER
ReceiveHandler;//接收函数

  RECEIVE_COMPLETE_HANDLER
ReceiveCompleteHandler;//接收完成回调函数

  STATUS_HANDLER
StatusHandler;//状态变换通知回调函数

  STATUS_COMPLETE_HANDLER
StatusCompleteHandler;//状态变换完成通知回调函数

  NDIS_STRING
Name;//协议名

  RECEIVE_PACKET_HANDLER
ReceivePacketHandler;//接收包函数

  BIND_HANDLER
BindAdapterHandler;//绑定通知回调函数

  UNBIND_HANDLER
UnbindAdapterHandler;//解除绑定通知回调函数

  PNP_EVENT_HANDLER
PnPEventHandler;//Pnp事件回调函数

  UNLOAD_PROTOCOL_HANDLER
UnloadHandler;//协议驱动的卸载例程

}
NDIS40_PROTOCOL_CHARACTERISTICS;

下面的函数用于将一个驱动注册为ndis协议驱动

VOID

NdisRegisterProtocol(

    OUT
PNDIS_STATUS                    Status,//返回状态

    OUT
PNDIS_HANDLE                    NdisProtocolHandle,//返回注册的协议驱动句柄

    IN  PNDIS_PROTOCOL_CHARACTERISTICS  ProtocolCharacteristics,

    IN  UINT                            CharacteristicsLength)

{

  PPROTOCOL_BINDING
Protocol;

  NTSTATUS
NtStatus;

  UINT
MinSize;

  PNET_PNP_EVENT
PnPEvent;

  *NdisProtocolHandle
= NULL;

  switch
(ProtocolCharacteristics->MajorNdisVersion)

    {

    case
0x03:

      MinSize
= sizeof(NDIS30_PROTOCOL_CHARACTERISTICS);

      break;

    case
0x04:

      MinSize
= sizeof(NDIS40_PROTOCOL_CHARACTERISTICS);

      break;

    case
0x05:

      MinSize
= sizeof(NDIS50_PROTOCOL_CHARACTERISTICS);

      break;

    default:

      *Status
= NDIS_STATUS_BAD_VERSION;

      return;

    }

  if
(CharacteristicsLength < MinSize) //结构体的长度必须与声明的ndis版本一致

    {

      *Status
= NDIS_STATUS_BAD_CHARACTERISTICS;

      return;

}

  //协议驱动句柄实际上是一个PROTOCOL_BINDING结构体指针

  Protocol
= ExAllocatePool(NonPagedPool,
sizeof(PROTOCOL_BINDING));//一个协议驱动描述符

  RtlZeroMemory(Protocol, sizeof(PROTOCOL_BINDING));

  RtlCopyMemory(&Protocol->Chars,
ProtocolCharacteristics, MinSize);//关键。记录协议特征

  KeInitializeSpinLock(&Protocol->Lock);

  InitializeListHead(&Protocol->AdapterListHead);//该协议驱动绑定的网卡列表初始为空

  *NdisProtocolHandle
= Protocol;//返回协议驱动的句柄

  ndisBindMiniportsToProtocol(Status, Protocol);//关键。刚一注册就在此绑定所有现有网卡

  PnPEvent
= ProSetupPnPEvent(NetEventBindsComplete,
NULL, 0);//构造一个所有绑定完成事件

  if
(PnPEvent)

  {

      if
(Protocol->Chars.PnPEventHandler)

          NtStatus
= (*Protocol->Chars.PnPEventHandler)(NULL,PnPEvent);

  }

  if
(*Status == NDIS_STATUS_SUCCESS)

 {

      ExInterlockedInsertTailList(&ProtocolListHead, &Protocol->ListEntry, &ProtocolListLock);//插入全局的协议驱动链表

  }

}

上面最主要的工作便是登记协议特征到驱动描述符中,然后附带绑定现有的已有网卡。下面的函数就是用来绑定所有现有网卡的。

VOID  ndisBindMiniportsToProtocol(OUT PNDIS_STATUS Status,
IN PPROTOCOL_BINDING
Protocol)

{

    HANDLE
DriverKeyHandle = NULL;

    PKEY_VALUE_PARTIAL_INFORMATION
KeyInformation = NULL;

    PNDIS_PROTOCOL_CHARACTERISTICS
ProtocolCharacteristics = &Protocol->Chars;

 

    RegistryPathStr
= ExAllocatePoolWithTag(PagedPool, sizeof(SERVICES_KEY) + ProtocolCharacteristics->Name.Length + sizeof(LINKAGE_KEY),
NDIS_TAG + __LINE__);

    wcscpy(RegistryPathStr, SERVICES_KEY);

    wcsncat(RegistryPathStr, ((WCHAR
*)ProtocolCharacteristics->Name.Buffer), ProtocolCharacteristics->Name.Length / sizeof(WCHAR));

    RegistryPathStr[wcslen(SERVICES_KEY)+ProtocolCharacteristics->Name.Length/sizeof(WCHAR)] = NULL;

    wcscat(RegistryPathStr, LINKAGE_KEY);

    //经过上面的拼凑,RegistryPathStr最终拼成…\Services\协议名\Linkage

    RtlInitUnicodeString(&RegistryPath, RegistryPathStr);

    InitializeObjectAttributes(&ObjectAttributes, &RegistryPath,
OBJ_CASE_INSENSITIVE, NULL,
NULL);

    NtStatus
= ZwOpenKey(&DriverKeyHandle,
KEY_READ, &ObjectAttributes);//打开Linkage

    ExFreePool(RegistryPathStr);

    if(!NT_SUCCESS(NtStatus))

      {

        *Status
= NDIS_STATUS_FAILURE;

        return;

      }

  {

    UNICODE_STRING
ValueName;

    ULONG
ResultLength;

    RtlInitUnicodeString(&ValueName, L”Bind”);

    NtStatus
= ZwQueryValueKey(DriverKeyHandle,
&ValueName, KeyValuePartialInformation,
NULL, 0, &ResultLength);

KeyInformation = ExAllocatePoolWithTag(PagedPool, sizeof(KEY_VALUE_PARTIAL_INFORMATION) + ResultLength, NDIS_TAG
+ __LINE__);

//查询Linkage键下的Bind值(多个网卡设备对象名称组成的一条字符串)

    NtStatus
= ZwQueryValueKey(DriverKeyHandle,
&ValueName, KeyValuePartialInformation,
KeyInformation,sizeof(KEY_VALUE_PARTIAL_INFORMATION) + ResultLength, &ResultLength);

    ZwClose(DriverKeyHandle);

  }

  *Status
= NDIS_STATUS_SUCCESS;

  //遍历每个网卡

  for
(DataPtr = (WCHAR
*)KeyInformation->Data;

          *DataPtr !=
0;   DataPtr
+= wcslen(DataPtr)
+ 1)

    {

      VOID
*BindContext = NULL;

      NDIS_STRING
DeviceName;

      NDIS_STRING
RegistryPath;

      WCHAR
*RegistryPathStr = NULL;

      ULONG
PathLength = 0;

      // DeviceName为‘\Device\小端口设备对象名称’形式

      RtlInitUnicodeString(&DeviceName, DataPtr);

      if
(!MiniLocateDevice(&DeviceName))//if 那个网卡尚未启动

          continue;

      if
(LocateAdapterBindingByName(Protocol, &DeviceName))
//if 本协议已绑定了那块网卡

          continue;

      PathLength
= sizeof(SERVICES_KEY)
+                              

          wcslen(
DataPtr + 8 ) * sizeof(WCHAR) +    

          sizeof(PARAMETERS_KEY) +                                                                          

          ProtocolCharacteristics->Name.Length + sizeof(WCHAR);                                        

      RegistryPathStr
= ExAllocatePool(PagedPool,
PathLength);

      wcscpy(RegistryPathStr, SERVICES_KEY);

      wcscat(RegistryPathStr, DataPtr
+ 8 );

      wcscat(RegistryPathStr, PARAMETERS_KEY);

      wcsncat(RegistryPathStr, ProtocolCharacteristics->Name.Buffer, ProtocolCharacteristics->Name.Length / sizeof(WCHAR) );

      RegistryPathStr[PathLength/sizeof(WCHAR) – 1] = 0;

      RtlInitUnicodeString(&RegistryPath, RegistryPathStr);

      //RegistryPath最终变成…\Services\小端口设备对象名 \Parameters\协议名 形式

        {

          BIND_HANDLER
BindHandler = ProtocolCharacteristics->BindAdapterHandler;

          if(BindHandler) //关键,通知协议驱动绑定网卡列表中的每块网卡

             BindHandler(Status, BindContext,
&DeviceName, &RegistryPath,
0);

    }

   ExFreePool(KeyInformation);

}

 

一个驱动注册为协议驱动后,ndis内部会为这个驱动创建一个协议驱动描述符,返回的句柄就是这个结构指针。Typedef PVOID NDIS_HANDLE,可见ndis句柄其实就是一个指针。

typedef struct _PROTOCOL_BINDING { 
//协议驱动描述符

    LIST_ENTRY                    ListEntry;        用来挂入全局协议驱动链表

    KSPIN_LOCK                    Lock;            

    NDIS_PROTOCOL_CHARACTERISTICS
Chars;            //关键。本协议驱动的特征

    WORK_QUEUE_ITEM               WorkItem;
        

    LIST_ENTRY                    AdapterListHead;  //本协议驱动绑定的所有网卡

}
PROTOCOL_BINDING, *PPROTOCOL_BINDING;

 

 

 

同样:小端口驱动也需要在其DriverEntry中将自己注册为一个ndis小端口驱动。

Struct NDIS40_MINIPORT_CHARACTERISTICS  //4.0版的小端口驱动特征结构

{

  UCHAR  MajorNdisVersion;

  UCHAR  MinorNdisVersion;

  UINT  Reserved;

  W_CHECK_FOR_HANG_HANDLER  CheckForHangHandler;

  W_DISABLE_INTERRUPT_HANDLER  DisableInterruptHandler;//禁用来自特定网卡的中断

  W_ENABLE_INTERRUPT_HANDLER  EnableInterruptHandler;
//启用来自特定网卡的中断

  W_HALT_HANDLER  HaltHandler;

  W_HANDLE_INTERRUPT_HANDLER  HandleInterruptHandler;//isr的后半部

  W_INITIALIZE_HANDLER  InitializeHandler;
//IRP_MN_START_DEVICE中调用的启动初始化函数

  W_ISR_HANDLER  ISRHandler; //我们的isr

  W_QUERY_INFORMATION_HANDLER  QueryInformationHandler;//处理查询请求的函数

  W_RECONFIGURE_HANDLER  ReconfigureHandler;

  W_RESET_HANDLER  ResetHandler;

  W_SEND_HANDLER  SendHandler;
//发送函数

  W_SET_INFORMATION_HANDLER  SetInformationHandler;//处理设置请求的函数

  W_TRANSFER_DATA_HANDLER  TransferDataHandler;//处理协议驱动发下来的转移数据请求的函数

  W_RETURN_PACKET_HANDLER  ReturnPacketHandler;
//归还包函数

  W_SEND_PACKETS_HANDLER  SendPacketsHandler;//发送包函数

  W_ALLOCATE_COMPLETE_HANDLER  AllocateCompleteHandler;

}

 

typedef struct _NDIS_M_DRIVER_BLOCK    
//小端口驱动描述符、句柄

{

    LIST_ENTRY                      ListEntry;                //用来挂入全局小端口驱动链表

    KSPIN_LOCK                      Lock;                    

    NDIS_MINIPORT_CHARACTERISTICS   MiniportCharacteristics;  //特征

    WORK_QUEUE_ITEM                 WorkItem;                

    PDRIVER_OBJECT                  DriverObject;             //小端口驱动对象

    LIST_ENTRY                      DeviceList;               //本驱动中创建的所有适配器设备

    PUNICODE_STRING                 RegistryPath;             //本驱动的服务键路径

}
NDIS_M_DRIVER_BLOCK, *PNDIS_M_DRIVER_BLOCK;

 

下面的函数用于将一个驱动注册为ndis小端口驱动

NDIS_STATUS

NdisMRegisterMiniport(

    IN  NDIS_HANDLE                     NdisWrapperHandle,//小端口驱动句柄

    IN  PNDIS_MINIPORT_CHARACTERISTICS  MiniportCharacteristics,

    IN  UINT                            CharacteristicsLength)

{

  UINT
MinSize;

  PNDIS_M_DRIVER_BLOCK
Miniport = (PNDIS_M_DRIVER_BLOCKNdisWrapperHandle;

  PNDIS_M_DRIVER_BLOCK
*MiniportPtr;

  NTSTATUS
Status;

  switch
(MiniportCharacteristics->MajorNdisVersion)

    {

      case
0x03:

        MinSize
= sizeof(NDIS30_MINIPORT_CHARACTERISTICS);

        break;

      case
0x04:

        MinSize
= sizeof(NDIS40_MINIPORT_CHARACTERISTICS);

        break;

      case
0x05:

        MinSize
= sizeof(NDIS50_MINIPORT_CHARACTERISTICS);

        break;

      default:

        return
NDIS_STATUS_BAD_VERSION;

    }

  if
(CharacteristicsLength < MinSize)

        return
NDIS_STATUS_BAD_CHARACTERISTICS;

  //这三个回调函数在任何ndis版本都必须提供

  if
((!MiniportCharacteristics->HaltHandler) ||

       (!MiniportCharacteristics->InitializeHandler)||

       (!MiniportCharacteristics->ResetHandler))

    {

      return
NDIS_STATUS_BAD_CHARACTERISTICS;

    }

  if
(MiniportCharacteristics->MajorNdisVersion < 0x05)

  {

      if
((!MiniportCharacteristics->QueryInformationHandler) ||

          (!MiniportCharacteristics->SetInformationHandler))

      {

           return
NDIS_STATUS_BAD_CHARACTERISTICS;

      }

  }

  else

  {

      if
(((!MiniportCharacteristics->QueryInformationHandler) ||

           (!MiniportCharacteristics->SetInformationHandler))
&&

           (!MiniportCharacteristics->CoRequestHandler))

      {

           return
NDIS_STATUS_BAD_CHARACTERISTICS;

      }

  }

  if
(MiniportCharacteristics->MajorNdisVersion == 0x03)

    {

      if
(!MiniportCharacteristics->SendHandler)

          return
NDIS_STATUS_BAD_CHARACTERISTICS;

    }

  else
if (MiniportCharacteristics->MajorNdisVersion == 0x04)

    {

      if
((!MiniportCharacteristics->SendHandler) &&

          (!MiniportCharacteristics->SendPacketsHandler))

        {

          return
NDIS_STATUS_BAD_CHARACTERISTICS;

        }

    }

  else
if (MiniportCharacteristics->MajorNdisVersion == 0x05)

    {

      if
((!MiniportCharacteristics->SendHandler) &&

          (!MiniportCharacteristics->SendPacketsHandler) &&

          (!MiniportCharacteristics->CoSendPacketsHandler))

        {

          return
NDIS_STATUS_BAD_CHARACTERISTICS;

        }

    }

  //关键。记录该小端口驱动的特征到驱动描述符中

  RtlCopyMemory(&Miniport->MiniportCharacteristics,
MiniportCharacteristics, MinSize);

  Status
= IoAllocateDriverObjectExtension(Miniport->DriverObject,
‘NMID’,

                                          
sizeof(PNDIS_M_DRIVER_BLOCK),
&MiniportPtr);

  *MiniportPtr
= Miniport;//驱动扩展指向小端口驱动描述符

  //这些irp派遣函数都被ndis托管了。如果我们在注册小端口前设置了这些派遣函数,将会被覆盖。

如果在注册小端口后再设置,可以hook ndis内部设置的那些派遣函数。(I表示Internal内部未导出函数)

  Miniport->DriverObject->MajorFunction[IRP_MJ_CREATE] = NdisICreateClose;

  Miniport->DriverObject->MajorFunction[IRP_MJ_CLOSE] = NdisICreateClose;

  Miniport->DriverObject->MajorFunction[IRP_MJ_PNP] = NdisIDispatchPnp;

  Miniport->DriverObject->MajorFunction[IRP_MJ_SHUTDOWN] = NdisIShutdown;

  Miniport->DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = NdisIDeviceIoControl;

//关键。Ndis内部托管了AddDevice,它会在内部创建小端口设备对象,绑定在硬件pdo

  Miniport->DriverObject->DriverExtension->AddDevice = NdisIAddDevice;

 

  return
NDIS_STATUS_SUCCESS;

}

 

协议驱动通过NdisRegisterProtocol,小端口驱动通过NdisMRegisterMiniportndis框架注册了自己的回调函数后,协议驱动就可以与小端口驱动在ndis框架下通过这两组回调函数进行交互通信了。Ndis.sys起着桥梁中介的作用,除此之外,ndis.sys模块还提供了大量的ndis运行库函数。因此又可以说ndis.sys是一个函数库。

 

 

NdisMRegisterMiniport之前,需要一个ndis小端口驱动句柄(实际上是一个驱动描述符结构),下面的宏就是用来创建一个小端口驱动句柄的。

#define
NdisMInitializeWrapper 
NdisInitializeWrapper

 

VOID

NdisInitializeWrapper(

    OUT
PNDIS_HANDLE   
NdisWrapperHandle, //返回创建的小端口驱动句柄

    IN  PVOID           SystemSpecific1,//必须传DriverObject

    IN  PVOID           SystemSpecific2,//必须传RegisterPath

    IN  PVOID           SystemSpecific3)//无用

{

  PNDIS_M_DRIVER_BLOCK
Miniport;

  PUNICODE_STRING
RegistryPath;

  WCHAR
*RegistryBuffer;

  *NdisWrapperHandle
= NULL;

  //创建一个小端口驱动描述符,也即句柄

  Miniport
= ExAllocatePool(NonPagedPool,
sizeof(NDIS_M_DRIVER_BLOCK));

  RtlZeroMemory(Miniport, sizeof(NDIS_M_DRIVER_BLOCK));

  KeInitializeSpinLock(&Miniport->Lock);

  Miniport->DriverObject = (PDRIVER_OBJECT)SystemSpecific1;

  RegistryPath
= ExAllocatePool(PagedPool,
sizeof(UNICODE_STRING));

  RegistryPath->Length = ((PUNICODE_STRING)SystemSpecific2)->Length;

  RegistryPath->MaximumLength = RegistryPath->Length + sizeof(WCHAR)

  RegistryBuffer
= ExAllocatePool(PagedPool,
RegistryPath->MaximumLength);

  RtlCopyMemory(RegistryBuffer, ((PUNICODE_STRING)SystemSpecific2)->Buffer,
RegistryPath->Length);

  RegistryBuffer[RegistryPath->Length/sizeof(WCHAR)] = 0;

  RegistryPath->Buffer = RegistryBuffer;

  Miniport->RegistryPath = RegistryPath;//记录这个小端口驱动的服务键路径

  InitializeListHead(&Miniport->DeviceList);//初始为空

  //将本小端口驱动挂入全局链表(貌似在NdisMRegisterMiniport中做这项工作更合理)

  ExInterlockedInsertTailList(&MiniportListHead, &Miniport->ListEntry, &MiniportListLock);

  *NdisWrapperHandle
= Miniport;//返回创建的小端口驱动句柄给用户

}

这样,在小端口驱动的描述符中有一个指针指向其驱动对象,而在驱动对象的标准扩展部中也有一个指针指向了小端口驱动的描述符。二者互相指向。

前面说过:ndis内部设置的AddDeviceNdisIAddDevice函数会在内部自动创建一个小端口设备对象,然后加入堆栈。我们看:

NTSTATUS

NdisIAddDevice(  //中间的I表示Internal,ndis.sys内部使用,未导出的函数

    IN
PDRIVER_OBJECT DriverObject,//ndis小端口驱动对象

    IN
PDEVICE_OBJECT PhysicalDeviceObject)//代表网卡的硬件pdo

{

  static
const WCHAR ClassKeyName[] = {‘C’,’l’,’a’,’s’,’s’,’\\’};

  static
const WCHAR LinkageKeyName[] = {‘\\’,’L’,’i’,’n’,’k’,’a’,’g’,’e’,0};

  MiniportPtr
= IoGetDriverObjectExtension(DriverObject, (PVOID)’NMID’);

  Miniport
= *MiniportPtr;//获得小端口驱动描述符

  //获取该硬件pdo的驱动键属性

  Status
= IoGetDeviceProperty(PhysicalDeviceObject,
DevicePropertyDriverKeyName,

                               0, NULL,
&DriverKeyLength);

  LinkageKeyBuffer
= ExAllocatePool(PagedPool,
DriverKeyLength +

                                    sizeof(ClassKeyName)
+ sizeof(LinkageKeyName));

  Status
= IoGetDeviceProperty(PhysicalDeviceObject,
DevicePropertyDriverKeyName,

                               DriverKeyLength, LinkageKeyBuffer
+

                               (sizeof(ClassKeyName)
/ sizeof(WCHAR)),&DriverKeyLength);

  RtlCopyMemory(LinkageKeyBuffer, ClassKeyName,
sizeof(ClassKeyName));

  RtlCopyMemory(LinkageKeyBuffer + ((sizeof(ClassKeyName) + DriverKeyLength)
/

                sizeof(WCHAR)) – 1, LinkageKeyName, sizeof(LinkageKeyName));

 

  // LinkageKeyBuffer最终为:‘Class\DriverKeyName\Linkage

  RtlZeroMemory(QueryTable, sizeof(QueryTable));

  RtlInitUnicodeString(&ExportName, NULL);

  QueryTable[0].Flags = RTL_QUERY_REGISTRY_REQUIRED
| RTL_QUERY_REGISTRY_DIRECT;

  QueryTable[0].Name = L”Export”;

  QueryTable[0].EntryContext = &ExportName;

  //查询该硬件pdoExportName,作为其端口设备对象名称

  Status
= RtlQueryRegistryValues(RTL_REGISTRY_CONTROL, LinkageKeyBuffer,

                                  QueryTable, NULL, NULL);

  //关键。Ndis内部自动为其创建了小端口设备

  Status
= IoCreateDevice(Miniport->DriverObject, sizeof(LOGICAL_ADAPTER),

            &ExportName,
FILE_DEVICE_PHYSICAL_NETCARD,0, FALSE, &DeviceObject);

  //关键。Ndis为我们创建的小端口设备对象使用标准的LOGICAL_ADAPTER结构设备扩展

  Adapter
= (PLOGICAL_ADAPTER)DeviceObject->DeviceExtension;

  KeInitializeSpinLock(&Adapter->NdisMiniportBlock.Lock);

  InitializeListHead(&Adapter->ProtocolListHead);//初始为空

  Status
= IoRegisterDeviceInterface(PhysicalDeviceObject,&GUID_DEVINTERFACE_NET,

                                    
NULL,&Adapter->NdisMiniportBlock.SymbolicLinkName);

  Adapter->NdisMiniportBlock.DriverHandle
= Miniport;

  Adapter->NdisMiniportBlock.MiniportName
= ExportName;//小端口设备对象名

  Adapter->NdisMiniportBlock.DeviceObject
= DeviceObject;

  Adapter->NdisMiniportBlock.PhysicalDeviceObject
= PhysicalDeviceObject;//该网卡的硬件pdo

  //关键。Ndis内部自动创建一个相应的小端口设备,并加入堆栈。(这些操作对用户透明)

  Adapter->NdisMiniportBlock.NextDeviceObject
=

  IoAttachDeviceToDeviceStack(Adapter->NdisMiniportBlock.DeviceObject,PhysicalDeviceObject);

 

  Adapter->NdisMiniportBlock.OldPnPDeviceState
= 0;

  Adapter->NdisMiniportBlock.PnPDeviceState
= NdisPnPDeviceAdded;//标记已创建设备加入堆栈

 

  KeInitializeTimer(&Adapter->NdisMiniportBlock.WakeUpDpcTimer.Timer);

  KeInitializeDpc(&Adapter->NdisMiniportBlock.WakeUpDpcTimer.Dpc,
MiniportHangDpc, Adapter);

  DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING;

 

  return
STATUS_SUCCESS;

}

总之,Ndis内部托管的AddDevice会为我们自动创建小端口设备,加入堆栈。但是协议驱动的AddDevice就没有了,因此协议驱动没有形式堆栈到设备栈中。协议驱动与小端口驱动之间断层了,irp只能最终下发到协议驱动这一层就再也传不下去了,协议驱动与小端口驱动之间的交互就不能使用传统的irp方式,而只能借助ndis框架和回调函数进行通信。

Ndis内部的小端口设备对象的设备扩展结构

typedef struct _LOGICAL_ADAPTER  //标准的小端口设备扩展

{

    //这个字段内部有一个自定义小端口设备扩展,用户设置的自定义设备扩展就放在那里

    NDIS_MINIPORT_BLOCK         NdisMiniportBlock;     

    PNDIS_MINIPORT_WORK_ITEM    WorkQueueHead;          /* Head of
work queue */

    PNDIS_MINIPORT_WORK_ITEM    WorkQueueTail;          /* Tail of
work queue */

    LIST_ENTRY                  ListEntry;              //用来挂入全局的小端口设备链表

    LIST_ENTRY                  MiniportListEntry;      //用来挂入本驱动中的小端口设备链表

    LIST_ENTRY                  ProtocolListHead;       //绑定着本小端口设备的所有协议驱动

    ULONG                       MediumHeaderSize;       //链路层头部长度(即链路层类型)

    HARDWARE_ADDRESS            Address;                //物理地址(以太网卡为MAC)

    ULONG                       AddressLength;          //物理地址长度(以太网卡为6B

    PMINIPORT_BUGCHECK_CONTEXT  BugcheckContext;       

}
LOGICAL_ADAPTER, *PLOGICAL_ADAPTER;

 

 

当小端口驱动加载执行了DriverEntryAddDevice后,系统就会发出一个IRP_MN_START_DEVICEpnp irp来启动设备。Pnp  irp 派遣函数也被ndis托管了,固定为:

NdisIDispatchPnp。我们看他是如何处理pnp irp

NTSTATUS

NdisIDispatchPnp(IN PDEVICE_OBJECT DeviceObject,PIRP Irp)

{

  PIO_STACK_LOCATION
Stack = IoGetCurrentIrpStackLocation(Irp);

  PLOGICAL_ADAPTER
Adapter = (PLOGICAL_ADAPTER)DeviceObject->DeviceExtension;

  NTSTATUS
Status;

  switch
(Stack->MinorFunction)

    {

      case
IRP_MN_START_DEVICE:

        Status
= NdisIForwardIrpAndWait(Adapter, Irp);//向下层转发直至完成

        if
(NT_SUCCESS(Status)
&& NT_SUCCESS(Irp->IoStatus.Status))

           Status =
NdisIPnPStartDevice(DeviceObject,
Irp);//执行通用的设备启动操作

        Irp->IoStatus.Status = Status;

        IoCompleteRequest(Irp, IO_NO_INCREMENT);

        break;

       …

    }

  return
Status;

}

 

实际的设备启动工作由NdisIPnPStartDevice完成。这是一个通用函数,用来完成一些通用的ndis网卡设备的启动工作。

NTSTATUS  NdisIPnPStartDevice(IN PDEVICE_OBJECT DeviceObject,PIRP Irp)

{   …

    //加入全局的适配器列表

ExInterlockedInsertTailList(&AdapterListHead,
&Adapter->ListEntry,
&AdapterListLock);

//关键。回调用户自己提供的启动初始化函数,执行用户自定义的初始化工作

    NdisStatus = (*Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.InitializeHandler)

(

    &OpenErrorStatus,
&SelectedMediumIndex, &MediaArray[0],

MEDIA_ARRAY_SIZE, Adapter,
(NDIS_HANDLE)&WrapperContext

);

Adapter->NdisMiniportBlock.MediaType = MediaArray[SelectedMediumIndex];//记录介质类型

  //加入所属小端口驱动内部的适配器链表

  ExInterlockedInsertTailList(&Adapter->NdisMiniportBlock.DriverHandle->DeviceList,
&Adapter->MiniportListEntry,
&Adapter->NdisMiniportBlock.DriverHandle->Lock);

 

  //关键。新网卡启动初始化完成后,通知所有协议驱动进行绑定

  CurrentEntry
= ProtocolListHead.Flink;

  while
(CurrentEntry != &ProtocolListHead)

  {

      ProtocolBinding
= CONTAINING_RECORD(CurrentEntry,
PROTOCOL_BINDING, ListEntry);

      ndisBindMiniportsToProtocol(&NdisStatus, ProtocolBinding);

      CurrentEntry
= CurrentEntry->Flink;

}

  …

}

 

小端口驱动都必须提供一个自定义的启动初始化回调函数,用来在网卡启动时执行某些初始化工作。下面是一个典型的启动初始化函数,我们看看一般要做哪些初始化工作。(注意下文所说的示例函数都来自于ne2000.sys这个通用的以太网卡小端口驱动)

//示例函数(这个示例回调函数做了初始化硬件、注册中断向量 等工作)

NDIS_STATUS NTAPI MiniportInitialize(

    OUT
PNDIS_STATUS   
OpenErrorStatus,

    OUT
PUINT          
SelectedMediumIndex,//返回该网卡的介质类型

    IN  PNDIS_MEDIUM    MediumArray,

    IN  UINT            MediumArraySize,

    IN  NDIS_HANDLE     MiniportAdapterHandle,//实际上就是内置的标准小端口设备扩展

    IN  NDIS_HANDLE     WrapperConfigurationContext)//硬件pdo的一些属性信息和配置键

{

    UINT
*RegNetworkAddress = 0;

    UINT
RegNetworkAddressLength = 0;

for (i =
0; i < MediumArraySize;
i++)

{

        if
(MediumArray[i]
== NdisMedium802_3)

            break;

    }

    if
(i == MediumArraySize)

        return
NDIS_STATUS_UNSUPPORTED_MEDIA;

    *SelectedMediumIndex
= i;//返回介质类型

 

    //分配一个自定义的小端口设备对象扩展

    Status
= NdisAllocateMemory(&Adapter,sizeof(NIC_ADAPTER),0,HighestAcceptableMax);

    NdisZeroMemory(Adapter, sizeof(NIC_ADAPTER));

Adapter->MiniportAdapterHandle  = MiniportAdapterHandle;//记录标准设备扩展

//下面是默认的资源配置

    Adapter->IoBaseAddress          = DRIVER_DEFAULT_IO_BASE_ADDRESS;//即port基地址

    Adapter->InterruptLevel         = DRIVER_DEFAULT_INTERRUPT_NUMBER;

    Adapter->InterruptVector 
      = DRIVER_DEFAULT_INTERRUPT_NUMBER;

    Adapter->InterruptShared        = DRIVER_DEFAULT_INTERRUPT_SHARED;

    Adapter->InterruptMode          = DRIVER_DEFAULT_INTERRUPT_MODE;

    Adapter->MaxMulticastListSize   = DRIVER_MAX_MULTICAST_LIST_SIZE;

    Adapter->InterruptMask          = DRIVER_INTERRUPT_MASK;

    Adapter->LookaheadSize          = DRIVER_MAXIMUM_LOOKAHEAD;//负载前视区长度

    //查询系统为该网卡分配的资源(irqport),记录到自定义设备扩展中

    MiQueryResources(&Status, Adapter, WrapperConfigurationContext);

    //如果分配失败或查询失败,就从注册表中配置该网卡需要的资源

    if
(Status != NDIS_STATUS_SUCCESS)

    {

        PNDIS_CONFIGURATION_PARAMETER
ConfigurationParameter;

        UNICODE_STRING
Keyword;

        //打开配置键

        NdisOpenConfiguration(&Status, &ConfigurationHandle,
WrapperConfigurationContext);

        if
(Status == NDIS_STATUS_SUCCESS)

        {

            //查询irq

            NdisInitUnicodeString(&Keyword, L”Irq”);

            NdisReadConfiguration(&Status, &ConfigurationParameter,
ConfigurationHandle, &Keyword, NdisParameterHexInteger);

            if(Status == NDIS_STATUS_SUCCESS)

            {

                Adapter->InterruptLevel
=

                Adapter->InterruptVector = ConfigurationParameter->ParameterData.IntegerData;

            }

            //查询port

            NdisInitUnicodeString(&Keyword, L”Port”);

            NdisReadConfiguration(&Status, &ConfigurationParameter,
ConfigurationHandle, &Keyword, NdisParameterHexInteger);

            if(Status == NDIS_STATUS_SUCCESS)

                Adapter->IoBaseAddress = ConfigurationParameter->ParameterData.IntegerData;

            NdisCloseConfiguration(ConfigurationHandle);

        }

    }

   
//关键。设置自定义设备扩展

 NdisMSetAttributes(MiniportAdapterHandle,

        (NDIS_HANDLE)Adapter,//记录这个自定义的小端口设备扩展 到 标准设备扩展内部

        FALSE,NdisInterfaceIsa);

 

    Status
= NdisMRegisterIoPortRange(&Adapter->IOBase,MiniportAdapterHandle,

             Adapter->IoBaseAddress,0x20);

if (Status
!= NDIS_STATUS_SUCCESS) 。。。

    Adapter->IOPortRangeRegistered = TRUE;

#ifndef NOCARD

    Status
= NICInitialize(Adapter);//初始化网卡内部的硬件寄存器

    if
(Status != NDIS_STATUS_SUCCESS)
。。。

    NdisOpenConfiguration(&Status, &ConfigurationHandle,
WrapperConfigurationContext);

    if
(Status == NDIS_STATUS_SUCCESS)

    {    //从注册表中读取软配置的MAC地址

         NdisReadNetworkAddress(&Status, (PVOID
*)&RegNetworkAddress, &RegNetworkAddressLength, ConfigurationHandle);

         if(Status == NDIS_STATUS_SUCCESS
&& RegNetworkAddressLength == 6)

         {

             for(i = 0; i < 6; i++)

                 Adapter->StationAddress[i] =
RegNetworkAddress[i];

         }

         NdisCloseConfiguration(ConfigurationHandle);

    }

    if
(Status != NDIS_STATUS_SUCCESS
|| RegNetworkAddressLength !=6)

    {

        for
(i = 0; i
< 6; i++)  //使用固定的MAC地址

             Adapter->StationAddress[i] =
Adapter->PermanentAddress[i];

    }

    。。。

    NICSetup(Adapter); //设置网卡内部的硬件寄存器

#endif

    //注册中断向量

    Status
= NdisMRegisterInterrupt(&Adapter->Interrupt,
MiniportAdapterHandle,

        Adapter->InterruptVector,Adapter->InterruptLevel,FALSE,

        Adapter->InterruptShared,Adapter->InterruptMode);

    if
(Status != NDIS_STATUS_SUCCESS)
。。。

    Adapter->InterruptRegistered = TRUE;

#ifndef NOCARD

    NICStart(Adapter); //设置网卡内部的硬件寄存器

#endif

    NdisMRegisterAdapterShutdownHandler(MiniportAdapterHandle, Adapter,
MiniportShutdown);

    Adapter->ShutdownHandlerRegistered = TRUE;

    InsertTailList(&DriverInfo.AdapterListHead,
&Adapter->ListEntry);

    return
NDIS_STATUS_SUCCESS;

}

 

如上,可以看出,一块网卡的启动初始化工作是比较复杂的。上面的示例函数分配了一个自定义的小端口设备对象扩展,初始化网卡内部的硬件寄存器,注册中断向量,最后返回网卡的介质类型告诉给ndis框架(前3工作是可选的,最后的告诉工作是必须的)

事实上,一般的网卡在启动初始化时都要做这些工作:【硬件、注断、自扩展】

硬件:指初始化硬件

注断:注册中断isr

自扩展:在标准小端口设备扩展之外再另行分配一个自定义设备扩展

题外话:

为什么要分配一个自定义设备扩展呢? 我们知道,ndis内部提供托管的AddDevice会为我们自动创建一个小端口设备对象,而这个设备对象的设备扩展是ndis内部预置的一个结构。以往我们手动调用IoCreateDevice时都是自己定义的设备扩展来保存自定义信息,但现在被ndis托管了,如果我们希望仍旧保存一些自定义信息怎么办?Ndis框架不傻,那个预置的小端口设备扩展内部就提供了一个字段(即适配器上下文),用来存放用户自定义的设备扩展。用户只需分配一个设备扩展,然后调用NdisMSetAttributes设置一下即可。

到时候ndis调用我们的回调函数时,会传入这个自定义设备扩展的。

 

如上,我们说了,在网卡启动初始化阶段,一般需要注册一个中断向量,下面的函数就是干这个的。

NDIS_STATUS

NdisMRegisterInterrupt(

    OUT
PNDIS_MINIPORT_INTERRUPT    Interrupt,//返回

    IN  NDIS_HANDLE                 MiniportAdapterHandle,

    IN  UINT                        InterruptVector,

    IN  UINT                        InterruptLevel,

    IN  BOOLEAN                     RequestIsr,

    IN  BOOLEAN                     SharedInterrupt,

    IN  NDIS_INTERRUPT_MODE         InterruptMode)

{

  NTSTATUS
Status;

  ULONG
MappedIRQ;

  KIRQL
DIrql;

  KAFFINITY
Affinity;

  PLOGICAL_ADAPTER
Adapter = (PLOGICAL_ADAPTER)MiniportAdapterHandle;

 

  RtlZeroMemory(Interrupt, sizeof(NDIS_MINIPORT_INTERRUPT));

  KeInitializeSpinLock(&Interrupt->DpcCountLock);

  // HandleDeferredProcessing为DPC

  KeInitializeDpc(&Interrupt->InterruptDpc,
HandleDeferredProcessing, Adapter);

  KeInitializeEvent(&Interrupt->DpcsCompletedEvent,
NotificationEvent, FALSE);

  Interrupt->SharedInterrupt = SharedInterrupt;

  Interrupt->IsrRequested = RequestIsr;

  Interrupt->Miniport = &Adapter->NdisMiniportBlock;

  MappedIRQ = HalGetInterruptVector(Adapter->NdisMiniportBlock.BusType,
Adapter->NdisMiniportBlock.BusNumber,InterruptLevel,
InterruptVector, &DIrql,&Affinity);

  //关键。注册中断向量。IsrServiceRoutine,是ndis自己内部提供的isr

  Status = IoConnectInterrupt(&Interrupt->InterruptObject, ServiceRoutine,
Interrupt, &Interrupt->DpcCountLock, MappedIRQ,DIrql, DIrql, InterruptMode, SharedInterrupt,
Affinity, FALSE);

 

  if
(NT_SUCCESS(Status))
{

      Adapter->NdisMiniportBlock.Interrupt
= Interrupt;

      Adapter->NdisMiniportBlock.RegisteredInterrupts++;

      return
NDIS_STATUS_SUCCESS;

  }

  return
NDIS_STATUS_FAILURE;

}

 

这样,一旦有中断发生,就会进入ServiceRoutine这个isr。这个isrndis内部自己提供的,我们看它做了什么

BOOLEAN  ServiceRoutine(IN  PKINTERRUPT Interrupt,  IN  PVOID  ServiceContext)

{

  BOOLEAN
InterruptRecognized = FALSE;

  BOOLEAN
QueueMiniportHandleInterrupt = FALSE;

  PNDIS_MINIPORT_INTERRUPT
NdisInterrupt = ServiceContext;

  PNDIS_MINIPORT_BLOCK
NdisMiniportBlock = NdisInterrupt->Miniport;

 

  if
(NdisInterrupt->IsrRequested)//是否要执行isr

  {

      //调用我们注册小端口特征时登记的isr,简称我们的isr

      (*NdisMiniportBlock->DriverHandle->MiniportCharacteristics.ISRHandler)(

          &InterruptRecognized,

&QueueMiniportHandleInterrupt,  //返回是否要执行isr的后半部

          NdisMiniportBlock->MiniportAdapterContext);

  }

 else
if (NdisMiniportBlock->DriverHandle->MiniportCharacteristics.DisableInterruptHandler)

 {

      (*NdisMiniportBlock->DriverHandle->MiniportCharacteristics.DisableInterruptHandler)(

          NdisMiniportBlock->MiniportAdapterContext);

       QueueMiniportHandleInterrupt
= TRUE;

       InterruptRecognized
= TRUE;

  }

  if
(QueueMiniportHandleInterrupt) //执行HandleDeferredProcessing这个DPC,即isr的后半部

      KeInsertQueueDpc(&NdisInterrupt->InterruptDpc,
NULL, NULL);

  return
InterruptRecognized;

}

 

//我们的isr。(这是一个示例函数)

VOID NTAPI MiniportISR(

    OUT
PBOOLEAN   
InterruptRecognized,

    OUT
PBOOLEAN   
QueueMiniportHandleInterrupt,//返回时

    IN  NDIS_HANDLE MiniportAdapterContext)

{

    //屏蔽来自这个网卡的后续中断。注意与cli指令不一样。

NICDisableInterrupts((PNIC_ADAPTER)MiniportAdapterContext);

    *InterruptRecognized          = TRUE;

    *QueueMiniportHandleInterrupt
= TRUE;

}

如上,我们编写的这个示例isr很简单,它仅仅暂时屏蔽来自这个网卡的后续中断,然后,QueueMiniportHandleInterrupt置为TRUE,表示将实质的中断处理工作纳入到DPC中去执行。由于DPC都是在开中断的条件下执行的,所以必须先屏蔽掉来自同一网卡的其它后续中断,防止嵌套。而这个DPC就是ndis内部自己提供的下面函数,我们看它做了什么工作。

VOID  HandleDeferredProcessing(

    IN  PKDPC   Dpc,

    IN  PVOID   DeferredContext,

    IN  PVOID   SystemArgument1,

    IN  PVOID   SystemArgument2)

{

  PLOGICAL_ADAPTER Adapter
= GET_LOGICAL_ADAPTER(DeferredContext);

//关键。调用用户自己注册小端口时提供的HandleInterruptHandler例程(*Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.HandleInterruptHandler)(

      Adapter->NdisMiniportBlock.MiniportAdapterContext); 

//重新启用中断后执行这个函数

if(Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.EnableInterruptHandler)

  (*Adapter->NdisMiniportBlock.DriverHandle->MiniportCharacteristics.EnableInterruptHandler)

(Adapter->NdisMiniportBlock.MiniportAdapterContext);

}

如上,用户自己注册小端口时提供的HandleInterruptHandler例程其实就是我们isr的后半部。当ndis框架回调执行了isr的后半部后,所有中断处理工作都处理完毕了,然后就可以开启来自这个网卡的后续中断了,也即撤销屏蔽。

 

下面就是一个示例HandleInterruptHandler,来自于ne2000驱动,我们看看那个驱动的isr后半部工作到底做了什么。

VOID NTAPI MiniportHandleInterrupt(IN  NDIS_HANDLE MiniportAdapterContext)//自定义设备扩展

{

    UCHAR
ISRValue;

    UCHAR
ISRMask;

    UCHAR
Mask;

    PNIC_ADAPTER
Adapter = (PNIC_ADAPTER)MiniportAdapterContext;

    UINT
i = 0;

ISRMask = Adapter->InterruptMask;//一般为0xFF

//所有网卡内部都配备有一个中断状态寄存器,即PG0_ISR

    NdisRawReadPortUchar(Adapter->IOBase
+ PG0_ISR, &ISRValue);//读取当前网卡的状态

    Adapter->InterruptStatus |= (ISRValue
& ISRMask);

 

    Mask
= 0x01;//mask表示位置掩码

while (Adapter->InterruptStatus != 0x00 && i++ < INTERRUPT_LIMIT)

 {

        if
(ISRValue != 0x00) {

            NdisRawWritePortUchar(Adapter->IOBase
+ PG0_ISR, ISRValue);

            Mask
= 0x01;//重新回到最低位

        }

        //逐位向高位扫描

        while
(((Adapter->InterruptStatus
& Mask) == 0) && (Mask < ISRMask))

            Mask
= (Mask << 1);

        switch
(Adapter->InterruptStatus
& Mask)

 {

        case
ISR_OVW://每当芯片中的接收缓冲区溢出时会触发这种中断

            Adapter->BufferOverflow = TRUE;

            if(Adapter->MiniportAdapterHandle)

               HandleReceive(Adapter); //接出所有帧,提交给上层

            Adapter->InterruptStatus &= ~ISR_OVW;

            break;

        case
ISR_RXE://每当收到一个错误帧时触发这种中断

            NICUpdateCounters(Adapter);

            Adapter->ReceiveError = TRUE;

        case
ISR_PRX://每当芯片收到一个以太网帧时,触发这种中断

            if(Adapter->MiniportAdapterHandle)

                HandleReceive(Adapter); //接出所有帧,提交给上层

            Adapter->InterruptStatus &= ~(ISR_PRX
| ISR_RXE);

            break;

        case
ISR_TXE://每当芯片发送一帧失败时,触发这种中断

            NICUpdateCounters(Adapter);

            Adapter->TransmitError = TRUE;

        case
ISR_PTX://每当芯片中的发送缓冲区变空时触发这种中断

            HandleTransmit(Adapter);

            Adapter->InterruptStatus &= ~(ISR_PTX
| ISR_TXE);

            break;

        case
ISR_CNT://每当芯片中的计数器溢出时触发这种中断

            NICUpdateCounters(Adapter);

            Adapter->InterruptStatus &= ~ISR_CNT;

            break;

        default:

            Adapter->InterruptStatus &= ~Mask;

            break;

        }

 

        Mask
= (Mask << 1);

        NdisRawReadPortUchar(Adapter->IOBase +
PG0_ISR, &ISRValue);

        Adapter->InterruptStatus |= (ISRValue
& ISRMask);//状态可能又变了,读取最新的状态

    }

}

 

如上,网卡的物理状态发生上述变化时,都会触发一次中断,同时记录在状态寄存器对应的位。

我们的isr每得到一次中断时,都要扫描状态寄存器中的所有状态位,一一处理(因为我们在处理中断时,屏蔽了来自这个网卡的中断,因此会造成中断累积。所以必须在每次中断的处理函数中处理所有可能发生的状态)

网卡芯片中有一个硬件发送缓冲区和一个硬件接收缓冲区。当从网络电缆来到一帧时,就会存放到芯片内部的接收缓冲区。芯片会将内部的发送缓冲区中的帧注入到电缆上,这个过程也比较费时。芯片与电缆的数据交换速度受网卡制造工艺限制,一般的网卡也不过是百兆、千兆带宽而已。即使交换速度慢,但是如果应用程序收帧的速度没有帧从网络电缆抵达网卡的速度快的话,网卡内部的接收缓冲区就会逐渐变满而溢出,从而导致触发中断。同理,当网卡终于把内部发送缓冲区中的数据发出到网络电缆后,发送缓冲区变成空闲时,也会触发中断。

 

Ne2000以太网卡的小端口驱动提供了HandleReceive这个函数,用于从网卡内部的接收缓冲区中读出所有帧,提交给上层(实际上是提交给绑定着这块网卡的所有协议驱动)HandleReceive这个函数内部使用了NdisMEthIndicateReceive宏完成提交工作。这个宏实际上调用了下面的函数来做提交工作

VOID

EthFilterDprIndicateReceive(

    IN   PETH_FILTER Filter,

    IN   NDIS_HANDLE MacReceiveContext,

    IN   PCHAR       Address,

    IN   PVOID       HeaderBuffer,

    IN   UINT        HeaderBufferSize,

    IN   PVOID       LookaheadBuffer,

    IN   UINT        LookaheadBufferSize,

    IN   UINT        PacketSize)

{

    MiniIndicateData((PLOGICAL_ADAPTER)((PETHI_FILTER)Filter)->Miniport,

              MacReceiveContext,HeaderBuffer,HeaderBufferSize,

              LookaheadBuffer,LookaheadBufferSize,PacketSize);

}

 

VOID

MiniIndicateData(       //向上提交部分帧

    PLOGICAL_ADAPTER    Adapter,//目标网卡设备的标准设备扩展

    NDIS_HANDLE         MacReceiveContext,

    PVOID               HeaderBuffer,//帧头

    UINT                HeaderBufferSize,//帧头长度

    PVOID               LookaheadBuffer,//负载部分的前N字节,又叫前视区

    UINT                LookaheadBufferSize,//
前视区长度

    UINT                PacketSize)//负载部分的总长

{

  KIRQL
OldIrql;

  PLIST_ENTRY
CurrentEntry;

  PADAPTER_BINDING
AdapterBinding;

 

  MiniDisplayPacket2(HeaderBuffer, HeaderBufferSize,
LookaheadBuffer, LookaheadBufferSize);

  KeAcquireSpinLock(&Adapter->NdisMiniportBlock.Lock, &OldIrql);

  {

      CurrentEntry
= Adapter->ProtocolListHead.Flink;

      //遍历绑定了本网卡的那些协议驱动

      while
(CurrentEntry != &Adapter->ProtocolListHead)

      {

          AdapterBinding
= CONTAINING_RECORD(CurrentEntry,
ADAPTER_BINDING, AdapterListEntry);

          //看到没,调用其提供的接收函数

          (*AdapterBinding->ProtocolBinding->Chars.ReceiveHandler)(

              AdapterBinding->NdisOpenBlock.ProtocolBindingContext,MacReceiveContext,

              HeaderBuffer,HeaderBufferSize,

              LookaheadBuffer,LookaheadBufferSize,PacketSize);

          CurrentEntry
= CurrentEntry->Flink;

      }

  }

  KeReleaseSpinLock(&Adapter->NdisMiniportBlock.Lock, OldIrql);

}

如上,这个函数用来向上层绑定的所有协议提交帧(可能不是完整的帧)

 

 

前面说过了,当一个新的网卡插入机器时,ndis框架会通知所有现有的协议驱动进行绑定。当一个新的协议驱动安装加载到系统时,ndis框架也会让这个协议绑定现有的所有网卡(协议驱动与网卡之间必须绑定后才能通信)。当ndis框架通知协议驱动进行绑定时,会调用各协议驱动注册的绑定回调函数,在这个函数中,我们应该调用NdisOpenAdapter打开那个新插入到系统的网卡,进行绑定。

VOID

NdisOpenAdapter(

    OUT
PNDIS_STATUS   
Status,//返回

    OUT
PNDIS_STATUS   
OpenErrorStatus,//返回

    OUT
PNDIS_HANDLE   
NdisBindingHandle,//返回生成的绑定句柄(即标准的绑定上下文)

    OUT
PUINT          
SelectedMediumIndex,//返回目标网卡的介质类型

    IN  PNDIS_MEDIUM    MediumArray,//目标协议支持的所有介质类型

    IN  UINT            MediumArraySize,

    IN  NDIS_HANDLE     NdisProtocolHandle,//目标协议

    IN  NDIS_HANDLE     ProtocolBindingContext,//自定义的绑定上下文

    IN  PNDIS_STRING    AdapterName,//小端口设备对象名(即目标网卡)

    IN  UINT            OpenOptions,

    IN  PSTRING         AddressingInformation   OPTIONAL)

{

  UINT
i;

  BOOLEAN
Found;

  PLOGICAL_ADAPTER
Adapter;

  PADAPTER_BINDING
AdapterBinding;

  PPROTOCOL_BINDING
Protocol = GET_PROTOCOL_BINDING(NdisProtocolHandle);

 

  Adapter
= MiniLocateDevice(AdapterName);//根据名称找到目标网卡

  Found
= FALSE;

  for
(i = 0; i <
MediumArraySize; i++)

  {

      if
(Adapter->NdisMiniportBlock.MediaType == MediumArray[i])

      {

          *SelectedMediumIndex
= i;

          Found
= TRUE;

          break;

      }

}

  //一种协议可以支持很多种网卡的,如tcpip协议可以承载在以太网卡、令牌环网卡、FDDI网卡、ATM网卡等多种链路类型的网卡

  if (!Found)//if目标协议不支持目标网卡

  {

      *Status
= NDIS_STATUS_UNSUPPORTED_MEDIA;

      return;

  }

 

  //分配一个标准的绑定上下文(即绑定句柄)

  AdapterBinding
= ExAllocatePool(NonPagedPool,
sizeof(ADAPTER_BINDING));

  RtlZeroMemory(AdapterBinding, sizeof(ADAPTER_BINDING));

  //在绑定上下文中记录谁绑定了谁

  AdapterBinding->ProtocolBinding        = Protocol;

  AdapterBinding->Adapter                = Adapter;

  //关键。在标准绑定上下文中记录自定义绑定上下文

  AdapterBinding->NdisOpenBlock.ProtocolBindingContext
= ProtocolBindingContext;

  AdapterBinding->NdisOpenBlock.BindingHandle
= (NDIS_HANDLE)AdapterBinding;

 

  //pro开头的都是ndis内部函数,某些宏需要这些函数

  AdapterBinding->NdisOpenBlock.RequestHandler      = ProRequest;

  AdapterBinding->NdisOpenBlock.ResetHandler        = ProReset;

  AdapterBinding->NdisOpenBlock.SendHandler         = ProSend;

  AdapterBinding->NdisOpenBlock.SendPacketsHandler  = ProSendPackets;

  AdapterBinding->NdisOpenBlock.TransferDataHandler
= ProTransferData;

  AdapterBinding->NdisOpenBlock.RequestCompleteHandler
=

    Protocol->Chars.RequestCompleteHandler;

 

  //互相插入各自的绑定列表中

  ExInterlockedInsertTailList(&Protocol->AdapterListHead,
&AdapterBinding->ProtocolListEntry, &Protocol->Lock);

  ExInterlockedInsertTailList(&Adapter->ProtocolListHead,
&AdapterBinding->AdapterListEntry, &Adapter->NdisMiniportBlock.Lock);

 

  *NdisBindingHandle
= (NDIS_HANDLE)AdapterBinding;//返回绑定句柄(即内部创建的标准绑定上下文)

  *Status
= NDIS_STATUS_SUCCESS;

}

 

TCP/IP、IPX/SPX都是协议驱动,如今的时代,tcpip占据了市场主导地位,我们看下这个协议驱动(tcpip.sys)的部分实现

NTSTATUS  DriverEntry(PDRIVER_OBJECT
DriverObject,PUNICODE_STRING
RegistryPath)

{

  NTSTATUS
Status;

  UNICODE_STRING
strIpDeviceName = RTL_CONSTANT_STRING(L“\\Device\\Ip”);

  UNICODE_STRING
strRawDeviceName = RTL_CONSTANT_STRING(L“\\Device\\RawIp”);

  UNICODE_STRING
strUdpDeviceName = RTL_CONSTANT_STRING(L“\\Device\\Udp”);

  UNICODE_STRING
strTcpDeviceName = RTL_CONSTANT_STRING(L“\\Device\\Tcp”);

  UNICODE_STRING
strNdisDeviceName = RTL_CONSTANT_STRING(L“Tcpip”);

  NDIS_STATUS
NdisStatus;

  LARGE_INTEGER
DueTime;

  KeInitializeDpc(&IPTimeoutDpc, IPTimeoutDpcFn,
NULL);

  KeInitializeTimer(&IPTimer);

 

  /*
Create IP device object */

  Status
= IoCreateDevice(DriverObject,
0, &strIpDeviceName,

    FILE_DEVICE_NETWORK,
0, FALSE, &IPDeviceObject);

  ChewInit(
IPDeviceObject );

  /*
Create RawIP device object */

  Status
= IoCreateDevice(DriverObject,
0, &strRawDeviceName,

    FILE_DEVICE_NETWORK,
0, FALSE, &RawIPDeviceObject);

  /*
Create UDP device object */

  Status
= IoCreateDevice(DriverObject,
0, &strUdpDeviceName,

    FILE_DEVICE_NETWORK,
0, FALSE, &UDPDeviceObject);

  /*
Create TCP device object */

  Status
= IoCreateDevice(DriverObject,
0, &strTcpDeviceName,

    FILE_DEVICE_NETWORK,
0, FALSE, &TCPDeviceObject);

  /* Setup
network layer and transport layer entities */

  KeInitializeSpinLock(&EntityListLock);

  EntityList
= ExAllocatePoolWithTag(NonPagedPool,sizeof(TDIEntityID) * MAX_TDI_ENTITIES);

 

  EntityCount
= 0;

  EntityMax   = MAX_TDI_ENTITIES;

 

  //分配全局包描述符池

  NdisAllocatePacketPool(&NdisStatus, &GlobalPacketPool,
100, sizeof(PACKET_CONTEXT));

  //分配全局缓冲描述符池

  NdisAllocateBufferPool(&NdisStatus, &GlobalBufferPool,
100);

 

  //初始化地址文件对象列表

  InitializeListHead(&AddressFileListHead);

  KeInitializeSpinLock(&AddressFileListLock);

  //初始化连接端点列表

  InitializeListHead(&ConnectionEndpointListHead);

  KeInitializeSpinLock(&ConnectionEndpointListLock);

  //初始化本协议的绑定网卡列表

  InitializeListHead(&InterfaceListHead);

  KeInitializeSpinLock(&InterfaceListLock);

 

  IPStartup(RegistryPath); //启动初始化网络层

  RawIPStartup();//启动初始化RawIp协议

  UDPStartup();//启动初始化Udp协议

  TCPStartup();//启动初始化Tcp协议

  ICMPStartup();//启动初始化Icmp协议

 

  //各种协议层设备都使用直接mdl io方式

  IPDeviceObject->Flags    |= DO_DIRECT_IO; 
RawIPDeviceObject->Flags |= DO_DIRECT_IO;

  UDPDeviceObject->Flags   |= DO_DIRECT_IO; 
TCPDeviceObject->Flags   |= DO_DIRECT_IO;

 

  DriverObject->MajorFunction[IRP_MJ_CREATE]  = TiDispatchOpenClose;

  DriverObject->MajorFunction[IRP_MJ_CLOSE]   = TiDispatchOpenClose;

  DriverObject->MajorFunction[IRP_MJ_INTERNAL_DEVICE_CONTROL]
= TiDispatchInternal;

  DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL]
= TiDispatch;

  DriverObject->DriverUnload = TiUnload;

 

  //注册回环网卡(127.0.0.1是一个特殊的虚拟网卡),加入绑定网卡列表和路由表

  Status
= LoopRegisterAdapter(NULL,
NULL);

  Status
= LANRegisterProtocol(&strNdisDeviceName);//关键。注册协议驱动特征

 

  DueTime.QuadPart = -(LONGLONG)IP_TIMEOUT * 10000;

  KeSetTimerEx(&IPTimer, DueTime, IP_TIMEOUT, &IPTimeoutDpc);

 

  return
STATUS_SUCCESS;

}

如上,这个协议驱动内部会创建一个网络层设备对象和三个传输层设备对象(RawIp也是传输层),这样,应用程序就可以直接打开这些设备,收发报文(不过,很少有应用程序这样做,一般都是通过socket

间接打开这些设备进行通信的)。

下面的函数用来将tcpip.sys注册为一个协议驱动

NTSTATUS LANRegisterProtocol(PNDIS_STRING Name)//协议名

{

    NDIS_STATUS
NdisStatus;

    NDIS_PROTOCOL_CHARACTERISTICS
ProtChars;

 

    InitializeListHead(&AdapterListHead);

    KeInitializeSpinLock(&AdapterListLock);

    RtlZeroMemory(&ProtChars, sizeof(NDIS_PROTOCOL_CHARACTERISTICS));

    ProtChars.MajorNdisVersion               = NDIS_VERSION_MAJOR;

    ProtChars.MinorNdisVersion               = NDIS_VERSION_MINOR;

    ProtChars.Name.Length                    = Name->Length;

    ProtChars.Name.Buffer                    = Name->Buffer;

    ProtChars.Name.MaximumLength             = Name->MaximumLength;

    ProtChars.OpenAdapterCompleteHandler     = ProtocolOpenAdapterComplete;

    ProtChars.CloseAdapterCompleteHandler    = ProtocolCloseAdapterComplete;

    ProtChars.ResetCompleteHandler           = ProtocolResetComplete;

    ProtChars.RequestCompleteHandler         = ProtocolRequestComplete;

    ProtChars.SendCompleteHandler            = ProtocolSendComplete;

    ProtChars.TransferDataCompleteHandler    = ProtocolTransferDataComplete;

    ProtChars.ReceiveHandler                 = ProtocolReceive;//关键

    ProtChars.ReceiveCompleteHandler         = ProtocolReceiveComplete;

    ProtChars.StatusHandler                  = ProtocolStatus;

    ProtChars.StatusCompleteHandler          = ProtocolStatusComplete;

    ProtChars.BindAdapterHandler             = ProtocolBindAdapter;

    ProtChars.PnPEventHandler                = ProtocolPnPEvent;

    ProtChars.UnbindAdapterHandler           = ProtocolUnbindAdapter;

    ProtChars.UnloadHandler                  = LANUnregisterProtocol;

    NdisRegisterProtocol(&NdisStatus,&NdisProtocolHandle,&ProtChars,

                         sizeof(NDIS_PROTOCOL_CHARACTERISTICS));

    if
(NdisStatus != NDIS_STATUS_SUCCESS)

        return
(NTSTATUS)NdisStatus;

    ProtocolRegistered
= TRUE;

    return
STATUS_SUCCESS;

}

 

 

当注册为协议驱动后,该协议驱动会打开所有现有适配器,进行绑定。一旦某个网卡收到了数据,触发中断,小端口驱动就会调用上层各绑定协议注册的接收函数,将收到的帧提交给它们。Tcpip协议驱动注册的接收函数是ProtocolReceive,我们看它是如何接收处理的。

NDIS_STATUS NTAPI ProtocolReceive(

    NDIS_HANDLE
BindingContext,//传进来的是的自定义绑定上下文

    NDIS_HANDLE
MacReceiveContext,

    PVOID
HeaderBuffer,//帧头

    UINT
HeaderBufferSize,

    PVOID
LookaheadBuffer,//负载部分前视区部分

    UINT
LookaheadBufferSize,

    UINT
PacketSize)//负载部分的总长

{

   

    PLAN_ADAPTER
Adapter = (PLAN_ADAPTER)BindingContext;

    PETH_HEADER
EHeader 
= (PETH_HEADER)HeaderBuffer;

USHORT EType;

    if
(Adapter->State
!= LAN_STATE_STARTED)

        return
NDIS_STATUS_NOT_ACCEPTED;

    if
(HeaderBufferSize < Adapter->HeaderSize)

        return
NDIS_STATUS_NOT_ACCEPTED;

if (Adapter->Media == NdisMedium802_3)

{

        if
((EType != ETYPE_IPv4)
&& (EType != ETYPE_ARP))

            return
NDIS_STATUS_NOT_ACCEPTED;//目前不支持其他报文

        PacketType
= EType;//承载的是IP/ARP报文

}

 else

        return
NDIS_STATUS_NOT_ACCEPTED; //目前只支持以太网卡

   

    //分配一个包描述符用来接收负载部分,注意一个包描述符内部可以包含多个缓冲描述符,组合起来描述一个逻辑上的连续缓冲

    NdisStatus
= AllocatePacketWithBuffer( &NdisPacket, NULL,PacketSize );

    PC(NdisPacket)->PacketType
= PacketType;

    IPPacket.NdisPacket = NdisPacket;

    IPPacket.Position = 0;

    TransferDataCalled++;

    if
(LookaheadBufferSize == PacketSize)//if刚好收到一个完整的帧

{

GetDataPtr( NdisPacket,
0, &BufferData, &temp );//获得包描述符的缓冲区地址

        NdisCopyLookaheadData(BufferData,LookaheadBuffer,LookaheadBufferSize,

                              Adapter->MacOptions);

    }

    Else //不要这个残帧,向下层小端口驱动投递一个接收包下去,请求转交完整的负载上来

{

        NdisTransferData(&NdisStatus, Adapter->NdisHandle,MacReceiveContext,
0,

 PacketSize,//完整负载

                         NdisPacket,//投递下去,相当于一个容器

 &BytesTransferred);//返回实际转交的字节数

    }

 

    if
(NdisStatus != NDIS_STATUS_PENDING)//手动调用完成回调函数

        ProtocolTransferDataComplete(BindingContext,NdisPacket,NdisStatus,PacketSize);

    return
NDIS_STATUS_SUCCESS;

}

 

当本次接收操作完成后,也即接收到一个完整的包后,就调用ProtocolTransferDataComplete进行处理,我们看具体是是如何处理接收到的包的。(此处的包为IPARP报文,我们看它是如何处理的)

VOID NTAPI ProtocolTransferDataComplete(

    NDIS_HANDLE
BindingContext,//自定义的绑定上下文

    PNDIS_PACKET
Packet,

    NDIS_STATUS
Status,

    UINT
BytesTransferred)

{

    TransferDataCompleteCalled++;

    if(
Status != NDIS_STATUS_SUCCESS
) return;

    LanSubmitReceiveWork(
BindingContext, Packet,
Status, BytesTransferred
);//实质函数

}

VOID LanSubmitReceiveWork(

    NDIS_HANDLE
BindingContext, //自定义的绑定上下文

    PNDIS_PACKET
Packet,//IP/ARP报文

    NDIS_STATUS
Status,

UINT BytesTransferred)

{

    PLAN_WQ_ITEM
WQItem = ExAllocatePoolWithTag(NonPagedPool, sizeof(LAN_WQ_ITEM));

    PLAN_ADAPTER
Adapter = (PLAN_ADAPTER)BindingContext;

    WQItem->Packet = Packet;

    WQItem->Adapter = Adapter;

    WQItem->BytesTransferred = BytesTransferred;

    ChewCreate(
LanReceiveWorker, WQItem
);//创建一条接收处理工作项

}

我们看到,收到一个报文后,以工作项的形式进行处理,最后进入LanReceiveWorker这个函数进行接收处理

VOID LanReceiveWorker( PVOID Context )

{

    UINT
PacketType;

    PLAN_WQ_ITEM
WorkItem = (PLAN_WQ_ITEM)Context;

    PNDIS_PACKET
Packet;

    PLAN_ADAPTER
Adapter;

    UINT
BytesTransferred;

    PNDIS_BUFFER
NdisBuffer;

    IP_PACKET
IPPacket;

    Packet
= WorkItem->Packet;

    Adapter
= WorkItem->Adapter;

    BytesTransferred
= WorkItem->BytesTransferred;

 

    ExFreePoolWithTag(WorkItem, WQ_CONTEXT_TAG);

    IPInitializePacket(&IPPacket, 0);

    IPPacket.NdisPacket = Packet;

    NdisGetFirstBufferFromPacket(Packet,&NdisBuffer,&IPPacket.Header,

                    &IPPacket.ContigSize,&IPPacket.TotalSize);

 

    IPPacket.ContigSize = IPPacket.TotalSize = BytesTransferred;

    PacketType
= PC(IPPacket.NdisPacket)->PacketType;

    IPPacket.Position = 0;

switch (PacketType)

{

    case
ETYPE_IPv4:

    case
ETYPE_IPv6:

     IPReceive(Adapter->Context,
&IPPacket);//上交给IP层去接收、解析处理

     break;

    case
ETYPE_ARP:

     ARPReceive(Adapter->Context,
&IPPacket);//上交给ARP层去接收、解析处理

    default:

        IPPacket.Free(&IPPacket);

     break;

    }

    FreeNdisPacket(
Packet );

}

ARP的就不看了,看IP报文是如何在IP层接收的

VOID IPReceive( PIP_INTERFACE IF, PIP_PACKET IPPacket
)

{

  UINT
Version = (((PIPv4_HEADER)IPPacket->Header)->VerIHL >> 4);

  switch
(Version) {

  case
4:

    IPPacket->Type = IP_ADDRESS_V4;

    IPv4Receive(IF, IPPacket);

    break;

  case
6:

    IPPacket->Type = IP_ADDRESS_V6;

    break;

  default:

    break;

  }

  IPPacket->Free(IPPacket);

}

 

VOID IPv4Receive(PIP_INTERFACE IF, PIP_PACKET IPPacket)

{

    IPPacket->HeaderSize = (((PIPv4_HEADER)IPPacket->Header)->VerIHL & 0x0F) << 2;

    if
(IPPacket->HeaderSize
> IPv4_MAX_HEADER_SIZE) //错误的包,丢弃

          return;

    if
(!IPv4CorrectChecksum(IPPacket->Header, IPPacket->HeaderSize)) //头部校验失败,丢弃

          return;

    IPPacket->TotalSize = WN2H(((PIPv4_HEADER)IPPacket->Header)->TotalLength);

    AddrInitIPv4(&IPPacket->SrcAddr,
((PIPv4_HEADER)IPPacket->Header)->SrcAddr);

    AddrInitIPv4(&IPPacket->DstAddr,
((PIPv4_HEADER)IPPacket->Header)->DstAddr);

    IPPacket->Position += IPPacket->HeaderSize;//负载部分的偏移位置

    IPPacket->Data     = (PVOID)((ULONG_PTR)IPPacket->Header
+ IPPacket->HeaderSize);

    ProcessFragment(IF, IPPacket);//拼接ip报文片段(IPPacket可能是一个片段)

}

ProcessFragment拼接成一个完整的数据报后,内部就会调用IPDispatchProtocol函数,将IP报文上交给上层相应的协议去接收处理(上层的协议可能是tcpudpicmpigmp等协议)

VOID IPDispatchProtocol(

    PIP_INTERFACE
Interface,//来自网卡

    PIP_PACKET
IPPacket)//完整IP报文

{

    UINT Protocol;

    IP_ADDRESS
SrcAddress;

    switch
(IPPacket->Type)
{

    case
IP_ADDRESS_V4:

        Protocol
= ((PIPv4_HEADER)(IPPacket->Header))->Protocol;//上层协议

        AddrInitIPv4(&SrcAddress, ((PIPv4_HEADER)(IPPacket->Header))->SrcAddr);

        break;

    case
IP_ADDRESS_V6:

        return;

    default:

        return;

    }

    NBResetNeighborTimeout(&SrcAddress);

    if
(Protocol < IP_PROTOCOL_TABLE_SIZE)

       (*ProtocolTable[Protocol])(Interface,
IPPacket);//关键。上交给相应的上层协议去接收处理

}

 

Tcp协议的接收处理函数是TcpReceiveUdp协议的接收处理函数是UdpReceive,我们看看是如何接收、解析udp报文的。

VOID UDPReceive(PIP_INTERFACE Interface,
PIP_PACKET IPPacket)//udp报文

{

  AF_SEARCH
SearchContext;

  PIPv4_HEADER
IPv4Header;

  PADDRESS_FILE
AddrFile;

  PUDP_HEADER
UDPHeader;

  PIP_ADDRESS
DstAddress, SrcAddress;

  UINT
DataSize, i;

  switch
(IPPacket->Type)
{

  case
IP_ADDRESS_V4:

    IPv4Header
= IPPacket->Header;

    DstAddress
= &IPPacket->DstAddr;

    SrcAddress
= &IPPacket->SrcAddr;

    break;

  case
IP_ADDRESS_V6:

    return;

  default:

    return;

  }

  UDPHeader
= (PUDP_HEADER)IPPacket->Data;

  i
= UDPv4ChecksumCalculate(IPv4Header, (PUCHAR)UDPHeader,WH2N(UDPHeader->Length));

  if
(i != DH2N(0x0000FFFF)
&& UDPHeader->Checksum != 0)//校验失败,简单丢弃

      return;

  i
= WH2N(UDPHeader->Length);//i=udp报文总长

  if
((i < sizeof(UDP_HEADER)) || (i
> IPPacket->TotalSize
– IPPacket->Position))

    return;//错误报文简单丢弃

  DataSize
= i – sizeof(UDP_HEADER);//负载部分的长度

  IPPacket->Data = (PVOID)((ULONG_PTR)IPPacket->Data + sizeof(UDP_HEADER));//负载位置

  AddrFile
= AddrSearchFirst(DstAddress,UDPHeader->DestPort,IPPROTO_UDP,&SearchContext);

  if
(AddrFile)

{

    do
{

      DGDeliverData(AddrFile,//投递给目标socket

             SrcAddress,DstAddress,

             UDPHeader->SourcePort,UDPHeader->DestPort,

             IPPacket,DataSize);

    } while
((AddrFile = AddrSearchNext(&SearchContext)) != NULL);//查找下一个目标socket

  }

}

如上,udp层是怎么处理接收到的报文的呢?它先检查校验和,不正确的话就简单丢包(因此,udp协议不可靠)。然后,将将这个udp报文投递给所有符合的socket(一个DstAddrDstPort可能对应多个socket

DGDeliverData函数暂时就不看了。

总结一下:每当网卡收到一个包后的处理流程为:isr->dpc->工作项->各协议层的接收处理函数

 

下面我们看IP报文的发送过程:

NTSTATUS IPSendDatagram(PIP_PACKET IPPacket,//完整ip报文

PNEIGHBOR_CACHE_ENTRY NCE,//根据路由表得出的目标邻接点

                       PIP_TRANSMIT_COMPLETE Complete, PVOID Context)//完成例程

{

//超出MTU将分成片段发出去

    return
SendFragments(IPPacket,
NCE, NCE->Interface->MTU,Complete, Context);

}

 

NTSTATUS SendFragments(

    PIP_PACKET
IPPacket,//完整IP报文

    PNEIGHBOR_CACHE_ENTRY
NCE,//目标邻接点

    UINT
PathMTU,//MTU

    PIP_TRANSMIT_COMPLETE
Complete,

    PVOID
Context)

{

    PIPFRAGMENT_CONTEXT
IFC;

    NDIS_STATUS
NdisStatus;

    PVOID
Data;

    UINT
BufferSize = PathMTU,
InSize;

    PCHAR
InData;

    GetDataPtr(
IPPacket->NdisPacket,
0, &InData, &InSize
);

if( InSize
< BufferSize ) BufferSize
= InSize;//分割成一个最大为MTU片段包

//IFC就表示一个片段包的发送上下文

IFC = ExAllocatePoolWithTag(NonPagedPool, sizeof(IPFRAGMENT_CONTEXT), IFC_TAG);

//NdisPacket就是一个片段包

    NdisStatus
= AllocatePacketWithBuffer ( &IFC->NdisPacket,
NULL, BufferSize
);

    GetDataPtr(
IFC->NdisPacket,
0, (PCHAR *)&Data,
&InSize );

    IFC->Header       =
((PCHAR)Data);

    IFC->Datagram     =
IPPacket->NdisPacket;//所属完整包

    IFC->DatagramData = ((PCHAR)IPPacket->Header)
+ IPPacket->HeaderSize;

    IFC->HeaderSize   =
IPPacket->HeaderSize;

    IFC->PathMTU      =
PathMTU;

    IFC->NCE          =
NCE;//目标邻接点

    IFC->Position     =
0;

    IFC->BytesLeft    =
IPPacket->TotalSize
– IPPacket->HeaderSize;

    IFC->Data         =
(PVOID)((ULONG_PTR)IFC->Header + IPPacket->HeaderSize);

    IFC->Complete     =
Complete;

    IFC->Context      =
Context;

    RtlCopyMemory(
IFC->Header,
IPPacket->Header,
IPPacket->HeaderSize
);

    PrepareNextFragment(IFC));

    NdisStatus
= IPSendFragment(IFC->NdisPacket, NCE, IFC);//将片段包发给指定邻接点

    return
NdisStatus;

}

NTSTATUS IPSendFragment(

    PNDIS_PACKET
NdisPacket,

    PNEIGHBOR_CACHE_ENTRY
NCE,

    PIPFRAGMENT_CONTEXT
IFC)

{

    return
NBQueuePacket(NCE,
NdisPacket, IPSendComplete,
IFC);//挂入指定邻接点的发送队列

}

 

接着看:

BOOLEAN NBQueuePacket(

  PNEIGHBOR_CACHE_ENTRY
NCE,

  PNDIS_PACKET
NdisPacket,//片段包

  PNEIGHBOR_PACKET_COMPLETE
PacketComplete,

  PVOID
PacketContext)

{

  KIRQL
OldIrql;

  PNEIGHBOR_PACKET
Packet;

  UINT
HashValue;

  //邻接点发送队列中的包结构

  Packet
= ExAllocatePoolWithTag( NonPagedPool, sizeof(NEIGHBOR_PACKET),NEIGHBOR_PACKET_TAG
);

  HashValue  = *(PULONG)(&NCE->Address.Address);

  HashValue
^= HashValue >> 16;

  HashValue
^= HashValue >> 8;

  HashValue
^= HashValue >> 4;

  HashValue
&= NB_HASHMASK;

  TcpipAcquireSpinLock(&NeighborCache[HashValue].Lock, &OldIrql);

  Packet->Complete = PacketComplete;

  Packet->Context = PacketContext;

  Packet->Packet = NdisPacket;//片段包

  InsertTailList(
&NCE->PacketQueue,
&Packet->Next
);//挂入队列

  TcpipReleaseSpinLock(&NeighborCache[HashValue].Lock, OldIrql);

  if(
!(NCE->State
& NUD_INCOMPLETE) )

      NBSendPackets(
NCE );//立即调用下层小端口驱动提供的发送函数进行发送

  return
TRUE;

}

 

VOID NBSendPackets( PNEIGHBOR_CACHE_ENTRY NCE
)

{

    PLIST_ENTRY
PacketEntry;

    PNEIGHBOR_PACKET
Packet;

    UINT
HashValue;

    HashValue  = *(PULONG)(&NCE->Address.Address);

    HashValue
^= HashValue >> 16;

    HashValue
^= HashValue >> 8;

    HashValue
^= HashValue >> 4;

    HashValue
&= NB_HASHMASK;

 

    //发送队列中所有待发包

    while
((PacketEntry = ExInterlockedRemoveHeadList(&NCE->PacketQueue,

                                             
&NeighborCache[HashValue].Lock))
!= NULL)

    {

     Packet
= CONTAINING_RECORD( PacketEntry,
NEIGHBOR_PACKET, Next
);

     PC(Packet->Packet)->DLComplete = NBCompleteSend;

     PC(Packet->Packet)->Context  = Packet;

     NCE->Interface->Transmit(

NCE->Interface->Context,//目标网卡

           Packet->Packet,//片段包

           0,

           NCE->LinkAddress,//目标邻接点的MAC地址

           LAN_PROTO_IPv4
);

    }

}

实际上,Transmit最终调用了小端口驱动自身提供的发送函数,将包给网卡。至于小端口驱动是怎么发出去的?一般小端口驱动会检查网卡芯片内部的硬件发送缓冲区是否空闲,若是,就立即写入硬件发送缓冲区中,否则,小端口驱动内部也维护一个发送队列,将暂时不能发的包储存在那个队列中。

 

邻接点是什么?邻接点就是发往目标机器的中途路径上的一台机器,一般就是默认网关。如果本机装了多个网卡,tcpip就会自动根据主机的路由表选择路由,将帧通过合适的网卡发给合适的邻接点。

 

 

Tcpip.sys内部创建了好几个设备对象,应用程序可以直接打开那些设备收发报文,不过,并不是通过IRM_MJ_READIRP_MJ_WRITE来收发报文的,而是通过IRP_MJ_INTERNAL_DEVICE_CONTROL进行。但是这样很麻烦,不好控制,应用程序一般借助socket来收发报文(驱动型木马往往直接使用IRP_MJ_INTERNAL_DEVICE_CONTROL来收发报文悄悄进行网络通信)。

Windows中的socket机制不同于unix,Windows,socket api并不是系统调用,它的实现机制分为用户空间和内核空间。用户空间便是ws2_32.dll,内核空间便是afd.sys这个‘通用socket驱动’。为什么说是通用的呢?因为socket分为好几种sockettcpipipxnetbiosAppleTalk等等。Afd驱动下层可以搭配任意协议驱动,只要那个协议驱动对afd提供tdi接口即可。在Windows中,各种协议驱动又叫服务提供者,afd驱动又叫服务使用者。下面我们看afdDriverEntry

NTSTATUS  DriverEntry(PDRIVER_OBJECT
DriverObject, PUNICODE_STRING
RegistryPath)

{

    PDEVICE_OBJECT
DeviceObject;

    UNICODE_STRING
wstrDeviceName = RTL_CONSTANT_STRING(L”\\Device\\Afd”);

    PAFD_DEVICE_EXTENSION
DeviceExt;

    NTSTATUS
Status;

    //均为AfdDispatch

    DriverObject->MajorFunction[IRP_MJ_CLOSE]
= AfdDispatch;

    DriverObject->MajorFunction[IRP_MJ_CREATE]
= AfdDispatch;

    DriverObject->MajorFunction[IRP_MJ_CLEANUP]
= AfdDispatch;

    DriverObject->MajorFunction[IRP_MJ_WRITE]
= AfdDispatch;

    DriverObject->MajorFunction[IRP_MJ_READ]
= AfdDispatch;

   
DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL]
= AfdDispatch;

    DriverObject->DriverUnload = AfdUnload;

    //创建了一个afd套接字驱动设备对象

    Status
= IoCreateDevice( DriverObject,sizeof(AFD_DEVICE_EXTENSION),&wstrDeviceName,

      
FILE_DEVICE_NAMED_PIPE,0,FALSE,&DeviceObject
);

    DeviceExt
= DeviceObject->DeviceExtension;

    KeInitializeSpinLock(
&DeviceExt->Lock
);

    InitializeListHead(
&DeviceExt->Polls
);

    return
(Status);

}

Socket api内部转换成socket
irp
发给afd设备,看看afd驱动是如何处理各种socket irp

NTSTATUS AfdDispatch(PDEVICE_OBJECT DeviceObject,
PIRP Irp)

{

    PIO_STACK_LOCATION
IrpSp = IoGetCurrentIrpStackLocation(Irp);

    NTSTATUS
Status = STATUS_NOT_IMPLEMENTED;

    Irp->IoStatus.Information
= 0;

    switch(IrpSp->MajorFunction)

    {

    case
IRP_MJ_CREATE:

     return
AfdCreateSocket(DeviceObject,
Irp, IrpSp);

    case
IRP_MJ_CLOSE:

     return
AfdCloseSocket(DeviceObject,
Irp, IrpSp);

    case
IRP_MJ_CLEANUP:

        return
AfdCleanupSocket(DeviceObject,
Irp, IrpSp);

    case
IRP_MJ_WRITE:

     return
AfdConnectedSocketWriteData( DeviceObject, Irp, IrpSp, TRUE );

    case
IRP_MJ_READ:

     return
AfdConnectedSocketReadData( DeviceObject, Irp, IrpSp, TRUE );

 

    case
IRP_MJ_DEVICE_CONTROL:

    {

     switch(
IrpSp->Parameters.DeviceIoControl.IoControlCode
) {

     case
IOCTL_AFD_BIND:

         return AfdBindSocket( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_CONNECT:

         return AfdStreamSocketConnect( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_START_LISTEN:

         return AfdListenSocket( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_RECV:

         return AfdConnectedSocketReadData( DeviceObject,
Irp, IrpSp,FALSE );

     case
IOCTL_AFD_SELECT:

         return AfdSelect( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_EVENT_SELECT:

         return AfdEventSelect( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_ENUM_NETWORK_EVENTS:

         return AfdEnumEvents( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_RECV_DATAGRAM:

         return AfdPacketSocketReadData( DeviceObject,
Irp, IrpSp );

 

     case
IOCTL_AFD_SEND:

         return AfdConnectedSocketWriteData( DeviceObject, Irp, IrpSp,FALSE );

     case
IOCTL_AFD_SEND_DATAGRAM:

         return AfdPacketSocketWriteData( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_GET_INFO:

         return AfdGetInfo( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_SET_INFO:

         return AfdSetInfo( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_GET_CONTEXT_SIZE:

         return AfdGetContextSize( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_GET_CONTEXT:

         return AfdGetContext( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_SET_CONTEXT:

         return AfdSetContext( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_WAIT_FOR_LISTEN:

         return AfdWaitForListen( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_ACCEPT:

         return AfdAccept( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_DISCONNECT:

         return AfdDisconnect( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_GET_SOCK_NAME:

         return AfdGetSockName( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_GET_PEER_NAME:

         return AfdGetPeerName( DeviceObject,
Irp, IrpSp );

     case
IOCTL_AFD_GET_CONNECT_DATA:

         return AfdGetConnectData(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_CONNECT_DATA:

         return AfdSetConnectData(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_DISCONNECT_DATA:

         return AfdSetDisconnectData(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_GET_DISCONNECT_DATA:

         return AfdGetDisconnectData(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_CONNECT_DATA_SIZE:

         return AfdSetConnectDataSize(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_DISCONNECT_DATA_SIZE:

         return AfdSetDisconnectDataSize(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_CONNECT_OPTIONS:

         return AfdSetConnectOptions(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_DISCONNECT_OPTIONS:

         return AfdSetDisconnectOptions(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_GET_CONNECT_OPTIONS:

         return AfdGetConnectOptions(DeviceObject,
Irp, IrpSp);

 

     case
IOCTL_AFD_GET_DISCONNECT_OPTIONS:

         return AfdGetDisconnectOptions(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_CONNECT_OPTIONS_SIZE:

         return AfdSetConnectOptionsSize(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_SET_DISCONNECT_OPTIONS_SIZE:

         return AfdSetDisconnectOptionsSize(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_GET_TDI_HANDLES:

         return AfdGetTdiHandles(DeviceObject,
Irp, IrpSp);

     case
IOCTL_AFD_DEFER_ACCEPT:

         DbgPrint(“IOCTL_AFD_DEFER_ACCEPT is UNIMPLEMENTED!\n”);

         break;

     case
IOCTL_AFD_GET_PENDING_CONNECT_DATA:

         DbgPrint(“IOCTL_AFD_GET_PENDING_CONNECT_DATA is
UNIMPLEMENTED!\n”);

         break;

     case
IOCTL_AFD_VALIDATE_GROUP:

         DbgPrint(“IOCTL_AFD_VALIDATE_GROUP is UNIMPLEMENTED!\n”);

         break;

     default:

         Status = STATUS_NOT_SUPPORTED;

         break;

     }

     break;

    }

    default:

    {

     Status
= STATUS_NOT_IMPLEMENTED;

     break;

    }

    }

    Irp->IoStatus.Status = Status;

    IoCompleteRequest(Irp, IO_NO_INCREMENT);

    return
(Status);

}

 

实际上,应用程序可以直接打开这个套接字驱动设备对象进行通信,但是应用程序很少这样做,因为不方便。为此,微软为afd套接字驱动提供了用户空间匹配的模块ws2_32.dll,通过socket api 来间接打开afd设备与afd驱动进行交互。Socket api除了方便外,另一个好处便是兼容unix,可移植。我们看下ws2_32.dllDllMain

BOOL

DllMain(HANDLE hInstDll,

        ULONG
dwReason,

        LPVOID
lpReserved)

{

    PWINSOCK_THREAD_BLOCK
p;

    switch
(dwReason)

    {

        case
DLL_PROCESS_ATTACH:

        {

            GlobalHeap
= GetProcessHeap();

            g_hInstDll
= hInstDll;

            CreateCatalog();

            InitProviderHandleTable();//初始化提供者的处理函数表

            UpcallTable.lpWPUCloseEvent         = WPUCloseEvent;

            UpcallTable.lpWPUCloseSocketHandle  = WPUCloseSocketHandle;

            UpcallTable.lpWPUCreateEvent        = WPUCreateEvent;

            UpcallTable.lpWPUCreateSocketHandle = WPUCreateSocketHandle;

            UpcallTable.lpWPUFDIsSet            = WPUFDIsSet;

            UpcallTable.lpWPUGetProviderPath    = WPUGetProviderPath;

            UpcallTable.lpWPUModifyIFSHandle    = WPUModifyIFSHandle;

            UpcallTable.lpWPUPostMessage        = PostMessageW;

            UpcallTable.lpWPUQueryBlockingCallback    = WPUQueryBlockingCallback;

            UpcallTable.lpWPUQuerySocketHandleContext = WPUQuerySocketHandleContext;

            UpcallTable.lpWPUQueueApc           = WPUQueueApc;

            UpcallTable.lpWPUResetEvent         = WPUResetEvent;

            UpcallTable.lpWPUSetEvent           = WPUSetEvent;

            UpcallTable.lpWPUOpenCurrentThread  = WPUOpenCurrentThread;

            UpcallTable.lpWPUCloseThread        = WPUCloseThread;

        }

        case
DLL_THREAD_ATTACH://重点

        {

            p
= HeapAlloc(GlobalHeap,
0, sizeof(WINSOCK_THREAD_BLOCK));

            p->Hostent = NULL;

            p->LastErrorValue = NO_ERROR;//每个线程的socket LastError

            p->Getservbyname 
= NULL;

            p->Getservbyport 
= NULL;

            NtCurrentTeb()->WinSockData = p;//每个线程有一个socket信息块

        }

        break;

        case
DLL_PROCESS_DETACH:

        {

            DestroyCatalog();

            FreeProviderHandleTable();

        }

        break;

        case
DLL_THREAD_DETACH:

        {

            p
= NtCurrentTeb()->WinSockData;

 

            if
(p)

              HeapFree(GlobalHeap,
0, p);

        }

        break;

    }

    return
TRUE;

}

 

Socket的创建:

SOCKET

socket(IN  INT af,//家族

       IN  INT type,//类型(报式/流式)

       IN  INT protocol)//协议

{

    return
WSASocketW(af,type,protocol,NULL,0,0);

}

 

SOCKET

WSASocketW(IN  INT af,IN  INT type,IN  INT protocol,

           IN  LPWSAPROTOCOL_INFOW
lpProtocolInfo,IN  GROUP g,IN  DWORD dwFlags)

{

    INT
Status;

    SOCKET
Socket;

    PCATALOG_ENTRY
Provider;

    WSAPROTOCOL_INFOW
ProtocolInfo;

    if
(!WSAINITIALIZED)

    {

        WSASetLastError(WSANOTINITIALISED);

        return
INVALID_SOCKET;

    }

    if
(!lpProtocolInfo)

    {

        lpProtocolInfo
= &ProtocolInfo;

        ZeroMemory(&ProtocolInfo, sizeof(WSAPROTOCOL_INFOW));

        ProtocolInfo.iAddressFamily = af;

        ProtocolInfo.iSocketType   
= type;

        ProtocolInfo.iProtocol     
= protocol;

    }

    Provider
= LocateProvider(lpProtocolInfo);//查找相应的服务提供者

    if
(!Provider)

    {

        WSASetLastError(WSAEAFNOSUPPORT);

        return
INVALID_SOCKET;

    }

Status = LoadProvider(Provider, lpProtocolInfo);//加载服务提供者

//调用相应提供者的套接字创建函数,tcpip的是WSPSocket函数

    Socket
= Provider->ProcTable.lpWSPSocket(af,type,protocol,lpProtocolInfo,

                                             g,dwFlags,&Status);

    return
Socket;//返回套接字句柄

}

 

SOCKET

WSPSocket(int AddressFamily,int SocketType,int Protocol,

          LPWSAPROTOCOL_INFOW
lpProtocolInfo,GROUP
g,

          DWORD dwFlags,LPINT lpErrno)

{

    PSOCKET_INFORMATION         Socket
= NULL;

PFILE_FULL_EA_INFORMATION  
EABuffer = NULL;

//根据该套接字的家族、类型、协议  匹配决定出要使用哪种下层协议驱动和传输层设备对象,返回到TransportName参数中

    Status
= SockGetTdiName (&AddressFamily,&SocketType,&Protocol,g,dwFlags,

                             &TransportName,//OUT

                             &HelperDLLContext,// OUT

                             &HelperData,//OUT

                             &HelperEvents);//OUT

    RtlInitUnicodeString(&DevName, L”\\Device\\Afd\\Endpoint”);//端点管理设备

    Socket
= HeapAlloc(GlobalHeap,
0, sizeof(*Socket));//socket信息,将会加入全局链表进行维护

    RtlZeroMemory(Socket, sizeof(*Socket));

    Socket->RefCount = 2;

    Socket->Handle = -1;//无效句柄

    Socket->SharedData.Listening
= FALSE;

    Socket->SharedData.State = SocketOpen;

    Socket->SharedData.AddressFamily
= AddressFamily;

    Socket->SharedData.SocketType
= SocketType;

    Socket->SharedData.Protocol
= Protocol;

    Socket->HelperContext = HelperDLLContext;

    Socket->HelperData = HelperData;

    Socket->HelperEvents = HelperEvents;

    Socket->LocalAddress = &Socket->WSLocalAddress;

    Socket->SharedData.SizeOfLocalAddress
= HelperData->MaxWSAddressLength;

    Socket->RemoteAddress = &Socket->WSRemoteAddress;

    Socket->SharedData.SizeOfRemoteAddress
= HelperData->MaxWSAddressLength;

    Socket->SharedData.UseDelayedAcceptance
= HelperData->UseDelayedAcceptance;

    Socket->SharedData.CreateFlags
= dwFlags;

    Socket->SharedData.CatalogEntryId
= lpProtocolInfo->dwCatalogEntryId;

    Socket->SharedData.ServiceFlags1
= lpProtocolInfo->dwServiceFlags1;

    Socket->SharedData.ProviderFlags
= lpProtocolInfo->dwProviderFlags;

    Socket->SharedData.GroupID
= g;

    Socket->SharedData.GroupType
= 0;

    Socket->SharedData.UseSAN =
FALSE;

    Socket->SharedData.NonBlocking
= FALSE;//默认为阻塞方式

    Socket->SanData = NULL;

    if(
Socket->SharedData.SocketType == SOCK_DGRAM
||

Socket->SharedData.SocketType == SOCK_RAW
)

    {

        Socket->SharedData.ServiceFlags1
|= XP1_CONNECTIONLESS;

    }

    SizeOfPacket
= TransportName.Length
+ sizeof(AFD_CREATE_PACKET)
+ sizeof(WCHAR);

    SizeOfEA
= sizeof(FILE_FULL_EA_INFORMATION)
+ AFD_PACKET_COMMAND_LENGTH + SizeOfPacket;

    EABuffer
= HeapAlloc(GlobalHeap,
0, SizeOfEA);//EA附加属性就是一个AFD_CREATE_PACKET结构体

    RtlZeroMemory(EABuffer, SizeOfEA);

    EABuffer->NextEntryOffset = 0;

    EABuffer->Flags = 0;

    EABuffer->EaNameLength = AFD_PACKET_COMMAND_LENGTH;

    RtlCopyMemory
(EABuffer->EaName,
AfdCommand, AFD_PACKET_COMMAND_LENGTH
+ 1);

    EABuffer->EaValueLength = SizeOfPacket;

    AfdPacket
= (PAFD_CREATE_PACKET)(EABuffer->EaName + EABuffer->EaNameLength
+ 1);

AfdPacket->SizeOfTransportName
= TransportName.Length;

//记录该套接字下层使用的传输层设备对象名

    RtlCopyMemory
(AfdPacket->TransportName,TransportName.Buffer,

TransportName.Length
+ sizeof(WCHAR));

    AfdPacket->GroupID = g;

    if
((Socket->SharedData.ServiceFlags1 & XP1_CONNECTIONLESS)
!= 0)

    {

        if
((SocketType != SOCK_DGRAM)
&& (SocketType != SOCK_RAW))

            goto
error;

        AfdPacket->EndpointFlags |= AFD_ENDPOINT_CONNECTIONLESS;

    }

    if
((Socket->SharedData.ServiceFlags1 & XP1_MESSAGE_ORIENTED)
!= 0)

    {

        if
(SocketType == SOCK_STREAM)

        {

            if
((Socket->SharedData.ServiceFlags1 & XP1_PSEUDO_STREAM)
== 0)

                goto error;

        }

        AfdPacket->EndpointFlags |= AFD_ENDPOINT_MESSAGE_ORIENTED;

    }

    if
(SocketType == SOCK_RAW)
AfdPacket->EndpointFlags
|= AFD_ENDPOINT_RAW;

 

InitializeObjectAttributes (&Object,&DevName,OBJ_CASE_INSENSITIVE
| OBJ_INHERIT,0,0);

//关键。打开afd驱动中的设备,创建一个套接字文件对象,返回套接字句柄到Sock参数中

    Status
= NtCreateFile(&Sock,GENERIC_READ | GENERIC_WRITE
| SYNCHRONIZE,&Object,

                          &IOSB,NULL,0,FILE_SHARE_READ
| FILE_SHARE_WRITE,

                          FILE_OPEN_IF,0,EABuffer,SizeOfEA);

    HeapFree(GlobalHeap, 0, EABuffer);

    Socket->Handle = (SOCKET)Sock;//记录句柄

if (g !=
0) …

//即FCB->Send.Size:该套接字的UDP发送缓冲区大小,默认为16384B

    GetSocketInformation
(Socket,AFD_INFO_SEND_WINDOW_SIZE,

                          &Socket->SharedData.SizeOfSendBuffer,NULL);

    //即FCB->Recv.Size:该套接字的UDP接收缓冲区大小,默认为16384B

    GetSocketInformation
(Socket,AFD_INFO_RECEIVE_WINDOW_SIZE,

                          &Socket->SharedData.SizeOfRecvBuffer,NULL);

    EnterCriticalSection(&SocketListLock);

    Socket->NextSocket = SocketListHead;

    SocketListHead
= Socket;//将新创建的套接字加入全局链表

    LeaveCriticalSection(&SocketListLock);

    CreateContext(Socket);

    Upcalls.lpWPUModifyIFSHandle(1, (SOCKET)Sock, lpErrno);

    return
(SOCKET)Sock;//返回套接字句柄

error:…

}

 

实际上,上面的函数会调用NtCreateFile打开设备,创建一个套接字文件对象,然后返回该文件对象的句柄(即套接字句柄)给用户。NtCreateFile内部在IopParseDevice中会创建一个文件对象,然后,会打开目标afd设备,生成IRP_MJ_CREATE发给目标设备,最终进入AfdDispatch这个派遣例程中处理该irp,前面看到,具体处理这种IRP的是下面的函数

NTSTATUS

AfdCreateSocket(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION
IrpSp)

{

    PAFD_FCB
FCB;

    PFILE_OBJECT
FileObject;

    PAFD_DEVICE_EXTENSION
DeviceExt;

    PFILE_FULL_EA_INFORMATION
EaInfo;

    PAFD_CREATE_PACKET
ConnectInfo = NULL;

    ULONG
EaLength;

    PWCHAR
EaInfoValue = NULL;

    UINT
Disposition, i;

    NTSTATUS
Status = STATUS_SUCCESS;

    DeviceExt
= DeviceObject->DeviceExtension;

    FileObject
= IrpSp->FileObject;

    Disposition
= (IrpSp->Parameters.Create.Options
>> 24) & 0xff;

    Irp->IoStatus.Information
= 0;

    //IRP_MJ_CREATE这种irpSystemBuffer就是EA附加属性

    EaInfo
= Irp->AssociatedIrp.SystemBuffer;

if( EaInfo
)

{

     ConnectInfo
= (PAFD_CREATE_PACKET)(EaInfo->EaName + EaInfo->EaNameLength
+ 1);

     EaInfoValue
= (PWCHAR)(((PCHAR)ConnectInfo) + sizeof(AFD_CREATE_PACKET));

     EaLength
= sizeof(FILE_FULL_EA_INFORMATION)
+EaInfo->EaNameLength
+EaInfo->EaValueLength;

    }

    //分配一个socket
FCB
,用来记录socket文件对象信息

    FCB
= ExAllocatePool(NonPagedPool,
sizeof(AFD_FCB));

RtlZeroMemory( FCB, sizeof( *FCB ) );

FileObject->FsContext
= FCB;//关键。该文件对象的FCB指向这个socket
FCB

    FCB->Flags = ConnectInfo
? ConnectInfo->EndpointFlags
: 0;

    FCB->GroupID = ConnectInfo
? ConnectInfo->GroupID
: 0;

    FCB->State = SOCKET_STATE_CREATED;

    FCB->FileObject = FileObject;//关联文件对象

    FCB->DeviceExt = DeviceExt;//关联的afd设备对象

    FCB->AddressFile.Handle
= INVALID_HANDLE_VALUE;//本套接字绑定的本地地址

    FCB->Connection.Handle =
INVALID_HANDLE_VALUE;

    KeInitializeMutex(
&FCB->Mutex,
0 );

    for(
i = 0; i <
6; i++ )

      
InitializeListHead( &FCB->PendingIrpList[i] );//关联6irp队列(用于异步模式)

    InitializeListHead(
&FCB->DatagramList
);//初始化udp收包队列

    InitializeListHead(
&FCB->PendingConnections
);//初始化tcp收包队列

    if(
ConnectInfo ) {

     FCB->TdiDeviceName.Length
= ConnectInfo->SizeOfTransportName;

     FCB->TdiDeviceName.MaximumLength
= FCB->TdiDeviceName.Length;

     FCB->TdiDeviceName.Buffer
= ExAllocatePool( NonPagedPool,
FCB->TdiDeviceName.Length );

     RtlCopyMemory(
FCB->TdiDeviceName.Buffer,ConnectInfo->TransportName,

                   FCB->TdiDeviceName.Length
);

}

    if(
FCB->Flags
& AFD_ENDPOINT_CONNECTIONLESS ) {       

        FCB->PollState |= AFD_EVENT_SEND;//套接字状态为:可发送

        FCB->PollStatus[FD_WRITE_BIT]
= STATUS_SUCCESS;

        PollReeval(
FCB->DeviceExt,
FCB->FileObject
);

    }

    if(
!NT_SUCCESS(Status)
) 。。。

    Irp->IoStatus.Status = Status;

    IoCompleteRequest(
Irp, IO_NETWORK_INCREMENT
);

    return
Status;

}

如上,每当创建一个套接字的时候,就为其准备一个irp队列、udp收包队列、tcp收包队列,分配关联一个FCB记录其他方面的套接字信息。

Afd相关的概念有:套接字驱动、套接字设备、套接字文件、套接字FCB

套接字驱动:afd.sys

套接字设备:\Device\Afd\Endpoint

套接字文件对象:每打开一次套接字设备生成一个套接字文件对象

套接字FCB:每个套接字文件对象关联的FCB

 

套接字创建完毕后,还需要绑定IP地址与端口号。注意套接字是afd驱动中的概念和术语,传输层并没有这种说法,传输层中对应会创建一个地址对象,来表示afd中的socketAfd中的socket绑定的就是传输层中的地址对象。传输层中有一个地址对象列表,维护记录着所有创建的地址对象。下面看一下套接字的绑定过程。“创绑地址文件”。(一个地址文件就代表一个地址对象)

INT

bind(IN SOCKET
s,

     IN
CONST struct sockaddr *name,

     IN
INT namelen)

{

    PCATALOG_ENTRY
Provider;

    INT
Status;

    INT
Errno;

    if
(!WSAINITIALIZED)

    {

        WSASetLastError(WSANOTINITIALISED);

        return
SOCKET_ERROR;

    }

    //获得该套接字使用的服务提供者

ReferenceProviderByHandle((HANDLE)s,&Provider);

// lpWSPBind在tcpip下实际上是WSPBind函数

    Status =
Provider->ProcTable.lpWSPBind(s,name,namelen,&Errno);

    DereferenceProviderByPointer(Provider);

    if
(Status == SOCKET_ERROR)

        WSASetLastError(Errno);

  return
Status;

}

 

 

INT

WSPBind(SOCKET Handle,

        const
struct sockaddr
*SocketAddress,

        int
SocketAddressLength,

        LPINT
lpErrno)

{

    IO_STATUS_BLOCK         IOSB;

    PAFD_BIND_DATA          BindData;

    PSOCKET_INFORMATION     Socket = NULL;

    NTSTATUS                Status;

    SOCKADDR_INFO           SocketInfo;

    HANDLE                  SockEvent;

 

    BindData
= HeapAlloc(GlobalHeap,
0, 0xA + SocketAddressLength);

    Status
= NtCreateEvent(&SockEvent,GENERIC_READ | GENERIC_WRITE,NULL,1,FALSE);

    Socket
= GetSocketStructure(Handle);//根据套接字句柄查找socket结构

    BindData->Address.TAAddressCount
= 1;

    BindData->Address.Address[0].AddressLength = SocketAddressLength
– sizeof(SocketAddress->sa_family);

    BindData->Address.Address[0].AddressType = SocketAddress->sa_family;

    RtlCopyMemory
(BindData->Address.Address[0].Address,
SocketAddress->sa_data,

                   SocketAddressLength – sizeof(SocketAddress->sa_family));

    Socket->HelperData->WSHGetSockaddrType
((PSOCKADDR)SocketAddress,

                                           
SocketAddressLength,&SocketInfo);

    if
(Socket->SharedData.ExclusiveAddressUse)

        BindData->ShareType = AFD_SHARE_EXCLUSIVE;

    else
if (SocketInfo.EndpointInfo == SockaddrEndpointInfoWildcard)

        BindData->ShareType = AFD_SHARE_WILDCARD;

    else
if (Socket->SharedData.ReuseAddresses)

        BindData->ShareType = AFD_SHARE_REUSE;

    else

        BindData->ShareType = AFD_SHARE_UNIQUE;

    //向afd中的套接字设备发送一个‘绑定请求’irp

    Status
= NtDeviceIoControlFile((HANDLE)Socket->Handle,SockEvent,NULL,NULL,&IOSB,

                                   IOCTL_AFD_BIND,BindData,

                                   0xA + Socket->SharedData.SizeOfLocalAddress,                                   BindData,

                                  
0xA + Socket->SharedData.SizeOfLocalAddress);

    if
(Status == STATUS_PENDING)

    {

        WaitForSingleObject(SockEvent, INFINITE);

        Status
= IOSB.Status;

    }

    NtClose(
SockEvent );

    HeapFree(GlobalHeap, 0, BindData);

    if
(Status != STATUS_SUCCESS)

        return
MsafdReturnWithErrno ( Status, lpErrno, 0, NULL );

    Socket->SharedData.State = SocketBound;//已完成绑定

    Socket->TdiAddressHandle = (HANDLE)IOSB.Information;

    if
(Socket->HelperEvents
& WSH_NOTIFY_BIND)

    {

        Status
= Socket->HelperData->WSHNotify(Socket->HelperContext,Socket->Handle,

                                               Socket->TdiAddressHandle,

                                               Socket->TdiConnectionHandle,

                                               WSH_NOTIFY_BIND);

 

        if
(Status)

        {

            if
(lpErrno) *lpErrno
= Status;

            return
SOCKET_ERROR;

        }

    }

    return MsafdReturnWithErrno ( Status,
lpErrno, 0, NULL
);

}

 

看看afd驱动是如何处理绑定请求的

NTSTATUS

AfdBindSocket(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION
IrpSp)

{

    NTSTATUS
Status = STATUS_SUCCESS;

    PFILE_OBJECT
FileObject = IrpSp->FileObject;//地址文件对象

    PAFD_FCB
FCB = FileObject->FsContext;//套接字FCB

    PAFD_BIND_DATA
BindReq;

    if(
!SocketAcquireStateLock( FCB ) ) return LostSocket( Irp );

    if(
!(BindReq = LockRequest(
Irp, IrpSp ))
)

      
return UnlockAndMaybeComplete(
FCB, STATUS_NO_MEMORY,Irp, 0 );

FCB->LocalAddress
= TaCopyTransportAddress( &BindReq->Address
);//记录该套接字的本地地址

//初始连接向自身,使得send操作进行环回,发给自己

Status = TdiBuildConnectionInfo(
&FCB->AddressFrom,FCB->LocalAddress
);

    if(
NT_SUCCESS(Status)
) //关键。在下层的传输层驱动中为本套接字‘创绑一个地址文件’

      
  Status
= WarmSocketForBind( FCB
); //创建、绑定 一个地址文件

//if UDP 套接字,立即向传输层设备投递一个接收请求。(为什么要这样做?后文有解释)

if( FCB->Flags & AFD_ENDPOINT_CONNECTIONLESS
)

{

     Status
= TdiReceiveDatagram

         ( &FCB->ReceiveIrp.InFlightRequest,FCB->AddressFile.Object,0,

           FCB->Recv.Window,FCB->Recv.Size,//Window表示UDP接收缓冲区

FCB->AddressFrom,&FCB->ReceiveIrp.Iosb,PacketSocketRecvComplete,FCB );

     if(
Status == STATUS_PENDING
) Status = STATUS_SUCCESS;

    }

    if
(NT_SUCCESS(Status))

        FCB->State = SOCKET_STATE_BOUND;//标记已完成绑定

    return
UnlockAndMaybeComplete( FCB, Status, Irp, (ULONG_PTR)FCB->AddressFile.Handle );

}

 

如上,上面最关键的操作便是在下层的传输层驱动中创建一个地址对象,然后让afd驱动中的套接字与传输层驱动中的这个地址对象进行绑定。具体是由下面的函数完成的。

NTSTATUS WarmSocketForBind( PAFD_FCB FCB ) //套接字FCB

{

NTSTATUS Status;

//在传输层创建一个地址对象进行绑定

    Status
= TdiOpenAddressFile(&FCB->TdiDeviceName,//目标下层传输层设备  tcp\udp\RawIP之一

                                FCB->LocalAddress,//要绑定的目标地址

                               
&FCB->AddressFile.Handle,//返回绑定的地址文件句柄

                               
&FCB->AddressFile.Object );//返回绑定的地址文件对象

    if
(!NT_SUCCESS(Status))

        return
Status;

    if
(FCB->Flags
& AFD_ENDPOINT_CONNECTIONLESS)

    {   //查询那种传输层协议支持的最大udp报文长度

        TdiQueryMaxDatagramLength(FCB->AddressFile.Object,&FCB->Recv.Size);

        FCB->Recv.Window = ExAllocatePool(PagedPool,
FCB->Recv.Size);//分配udp接收缓冲区

    }

    return
Status;

}

由于udp协议是面向报文的,是以报文为单位进行收发的,所以要接收完整的udp报文就必须分配足够大的接收缓冲区。

NTSTATUS TdiOpenAddressFile(

    PUNICODE_STRING
DeviceName,//传输层的tdi设备名

    PTRANSPORT_ADDRESS
Name,//要绑定的地址

    PHANDLE
AddressHandle,//返回地址文件句柄

    PFILE_OBJECT
*AddressObject) 返回地址文件句柄

{

     PFILE_FULL_EA_INFORMATION
EaInfo;

     NTSTATUS
Status;

     ULONG
EaLength;

     PTRANSPORT_ADDRESS
Address;

     EaLength
= sizeof(FILE_FULL_EA_INFORMATION)
+TDI_TRANSPORT_ADDRESS_LENGTH +

               TaLengthOfTransportAddress(
Name ) + 1;

     EaInfo
= (PFILE_FULL_EA_INFORMATION)ExAllocatePool(NonPagedPool,
EaLength);

     RtlZeroMemory(EaInfo, EaLength);

     EaInfo->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH;

     RtlCopyMemory(EaInfo->EaName,TdiTransportAddress,TDI_TRANSPORT_ADDRESS_LENGTH);

     EaInfo->EaValueLength = sizeof(TA_IP_ADDRESS);

     Address
= (PTRANSPORT_ADDRESS)(EaInfo->EaName + TDI_TRANSPORT_ADDRESS_LENGTH + 1);

     TaCopyTransportAddressInPlace(
Address, Name
);

     //关键。打开对应传输层的tdi设备,创建一个地址文件对象,并记录到套接字FCB中进行绑定

     Status
= TdiOpenDevice(DeviceName,EaLength,EaInfo,AddressHandle,AddressObject);

     ExFreePool(EaInfo);

     return
Status;

}

 

NTSTATUS TdiOpenDevice(

    PUNICODE_STRING
DeviceName,

    ULONG
EaLength,

    PFILE_FULL_EA_INFORMATION
EaInfo,

    PHANDLE
Handle,//返回地址文件句柄

    PFILE_OBJECT
*Object)//返回地址文件对象

{

    OBJECT_ATTRIBUTES
Attr;

    IO_STATUS_BLOCK
Iosb;

    NTSTATUS
Status;

    InitializeObjectAttributes(&Attr, DeviceName,OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE,

                               NULL,NULL);

    //关键。打开对应的传输层设备,生成一个地址文件对象

    Status
= ZwCreateFile(Handle,GENERIC_READ | GENERIC_WRITE
| SYNCHRONIZE,

                          &Attr,&Iosb,0,FILE_ATTRIBUTE_NORMAL,0,FILE_OPEN_IF,0,

                          EaInfo,EaLength);

if (NT_SUCCESS(Status))

{

        Status
= ObReferenceObjectByHandle(*Handle,GENERIC_READ
| GENERIC_WRITE | SYNCHRONIZE,                                         IoFileObjectType,KernelMode,
(PVOID*)Object,NULL);

    }

    return
Status;

}

 

ZwCreateFile将给传输层的设备发送一个IRP_MJ_CREATE,程序流从afd.sys驱动进入tcpip.sys传输层驱动中的irp派遣函数中。看看下面传输层是如何处理这种irp请求的(创建、绑定地址对象请求)

NTSTATUS TiCreateFileObject(PDEVICE_OBJECT DeviceObject,PIRP Irp)

{

 

  EaInfo
= Irp->AssociatedIrp.SystemBuffer;

  Context
= ExAllocatePoolWithTag(NonPagedPool, sizeof(TRANSPORT_CONTEXT),TRANS_CONTEXT_TAG);

  Context->CancelIrps = FALSE;

  IrpSp
= IoGetCurrentIrpStackLocation(Irp);

  IrpSp->FileObject->FsContext
= Context;

  Request.RequestContext      
= Irp;

  //if 是afd层发下来的‘创绑地址文件对象请求’

  if
(EaInfo && (EaInfo->EaNameLength == TDI_TRANSPORT_ADDRESS_LENGTH)
&&

      (RtlCompareMemory
(&EaInfo->EaName,
TdiTransportAddress,

                         TDI_TRANSPORT_ADDRESS_LENGTH)
== TDI_TRANSPORT_ADDRESS_LENGTH))

{

    Address
= (PTA_IP_ADDRESS)(EaInfo->EaName + EaInfo->EaNameLength + 1);

    if
(DeviceObject == TCPDeviceObject)

      Protocol
= IPPROTO_TCP;

    else
if (DeviceObject
== UDPDeviceObject)

      Protocol
= IPPROTO_UDP;

    else
if (DeviceObject
== IPDeviceObject)

      Protocol
= IPPROTO_RAW;

else if (DeviceObject == RawIPDeviceObject)

      Status
= TiGetProtocolNumber(&IrpSp->FileObject->FileName, &Protocol);

else

{

      ExFreePoolWithTag(Context, TRANS_CONTEXT_TAG);

      return
STATUS_INVALID_PARAMETER;

}

//关键。创建一个传输层的地址对象,返回到Request参数中

    Status
= FileOpenAddress(&Request, Address, Protocol, NULL);

if (NT_SUCCESS(Status))

{ // FsContext2标记是Context->Handle是一个传输层地址

      IrpSp->FileObject->FsContext2
= (PVOID)TDI_TRANSPORT_ADDRESS_FILE;

      Context->Handle.AddressHandle
= Request.Handle.AddressHandle;//记录到地址文件对象的FCB

    }

 }

 //afd层发下来连接请求

 else
if (EaInfo
&& (EaInfo->EaNameLength == TDI_CONNECTION_CONTEXT_LENGTH)
&&

          (RtlCompareMemory
(&EaInfo->EaName,
TdiConnectionContext,

            TDI_CONNECTION_CONTEXT_LENGTH)
== TDI_CONNECTION_CONTEXT_LENGTH))

{

if (DeviceObject
!= TCPDeviceObject)

{

      ExFreePoolWithTag(Context, TRANS_CONTEXT_TAG);

      return
STATUS_INVALID_PARAMETER;

    }

    ClientContext
= *((PVOID*)(EaInfo->EaName + EaInfo->EaNameLength));

    Status
= FileOpenConnection(&Request, ClientContext);

if (NT_SUCCESS(Status))

{

      IrpSp->FileObject->FsContext2
= (PVOID)TDI_CONNECTION_FILE;

      Context->Handle.ConnectionContext
= Request.Handle.ConnectionContext;

    }

}

else …

Irp->IoStatus.Status = Status;

return Status;

}

 

NTSTATUS FileOpenAddress(

  PTDI_REQUEST
Request,//OUT

  PTA_IP_ADDRESS
Address,//IP地址:端口号

  USHORT
Protocol,//TCP/UDP/RawIp

  PVOID
Options)

{

  PADDRESS_FILE
AddrFile;

  //最关键、为每个afd中的socket在传输层创建一个地址对象与其绑定

  AddrFile
= ExAllocatePoolWithTag(NonPagedPool, sizeof(ADDRESS_FILE),ADDR_FILE_TAG);

  RtlZeroMemory(AddrFile, sizeof(ADDRESS_FILE));

  AddrFile->RefCount = 1;

  AddrFile->Free = AddrFileFree;

  AddrFile->TTL = 128;

  AddrFile->DF = 0;

  AddrFile->BCast = 1;

  AddrFile->HeaderIncl = 1;

  AddrFile->Family = Address->Address[0].AddressType;

  AddrFile->Address.Address.IPv4Address = Address->Address[0].Address[0].in_addr;

  AddrFile->Address.Type = IP_ADDRESS_V4;

  //如果用户给定了一个非法IP地址(所有网卡均不符合)

  if
(!AddrIsUnspecified(&AddrFile->Address)
&& !AddrLocateInterface(&AddrFile->Address))

{

      
ExFreePoolWithTag(AddrFile, ADDR_FILE_TAG);

      
return STATUS_INVALID_ADDRESS;

  }

  switch
(Protocol)

{

  case
IPPROTO_TCP:

      //检测端口冲突、分配空闲的tcp端口号

      AddrFile->Port = TCPAllocatePort(Address->Address[0].Address[0].sin_port);

      if
((Address->Address[0].Address[0].sin_port
&&

           AddrFile->Port != Address->Address[0].Address[0].sin_port) ||

           AddrFile->Port == 0xffff)

      {

          ExFreePoolWithTag(AddrFile, ADDR_FILE_TAG);

          return
STATUS_ADDRESS_ALREADY_EXISTS;

      }

      AddEntity(CO_TL_ENTITY, AddrFile,
CO_TL_TCP);

      AddrFile->Send = NULL; /* TCPSendData */

      break;

  case
IPPROTO_UDP:

      //检测端口冲突、分配空闲的udp端口号

      AddrFile->Port = UDPAllocatePort(Address->Address[0].Address[0].sin_port);

      if
((Address->Address[0].Address[0].sin_port
&&

           AddrFile->Port != Address->Address[0].Address[0].sin_port) ||

           AddrFile->Port == 0xffff)

      {

          ExFreePoolWithTag(AddrFile, ADDR_FILE_TAG);

          return STATUS_ADDRESS_ALREADY_EXISTS;

      }

      AddEntity(CL_TL_ENTITY, AddrFile,
CL_TL_UDP);

      AddrFile->Send = UDPSendDatagram;

      break;

 

  case
IPPROTO_ICMP:

    AddrFile->Port = 0;

    AddrFile->Send = ICMPSendDatagram;

    AddEntity(ER_ENTITY, AddrFile,
ER_ICMP);

    break;

  default:

    /* Use
raw IP for all other protocols */

    AddrFile->Port = 0;

    AddrFile->Send = RawIPSendDatagram;

    AddEntity(CL_TL_ENTITY, AddrFile,
0);

    break;

  }

  AddrFile->Protocol = Protocol;

  InitializeListHead(&AddrFile->ReceiveQueue);//每个地址对象有一个接收irp请求队列

  InitializeListHead(&AddrFile->TransmitQueue);
//每个地址对象有一个发送irp请求队列

  KeInitializeSpinLock(&AddrFile->Lock);

  Request->Handle.AddressHandle
= AddrFile;//返回创建的地址对象给用户

 //关键。将创建好的地址对象加入tcpip内部维护的地址对象列表。这样,当收到一个报文时,tcpip就能根据头部的协议、目标地址、目标端口号找打对应的地址对象,然后将报文上交给afd中所有绑定了这个地址对象的套接字。

ExInterlockedInsertTailList(&AddressFileListHead,&AddrFile->ListEntry,&AddressFileListLock);

  return
STATUS_SUCCESS;

}

 

看看UDP是如何检测、分配端口号的

UINT UDPAllocatePort( UINT HintPort )

{

if( HintPort
) //如果用户指定了端口号,就检测冲突

{

        if(
AllocatePort( &UDPPorts,
HintPort ) ) return
HintPort;//if 空闲

        else
return (UINT)-1;

}

//如果用户指定了端口号为0,就自动分配一个空闲未用的端口号

else return AllocatePortFromRange ( &UDPPorts, UDP_STARTING_PORT,

                 UDP_STARTING_PORT
+ UDP_DYNAMIC_PORTS );

}

 

看了这么多,总结一下套接字的绑定过程实际上是将Afd中创建的套接字文件对象 绑定到 tcpip传输层驱动中创建的地址对象。实际上,我们可以简单理解为:afd.套接字   绑定    传输层中的地址

其本质是通过afd给传输层设备发送一个“创绑地址文件对象请求”实现的。

 

 

一个udp套接字在绑定了传输层地址后,就可以开始收发数据报了,不用事先建立连接.我们看看udp报文的发送过程

INT

sendto(IN  SOCKET s,

       IN  CONST CHAR FAR* buf,

       IN  INT len,

       IN  INT flags,

       IN  CONST struct sockaddr *to,//目标ip:port

       IN  INT tolen)

{

    DWORD
Error;

    DWORD
BytesSent;

    WSABUF
WSABuf;

    WSABuf.len = len;

    WSABuf.buf = buf;

    Error
= WSASendTo(s,&WSABuf,1,&BytesSent,flags,to,tolen,NULL,NULL);

    if(
Error )

        return
-1;//即SOCKET_ERROR

    else

        return
BytesSent;

}

 

INT

WSASendTo(IN  SOCKET s,

          IN  LPWSABUF lpBuffers,

          IN  DWORD dwBufferCount,

          OUT
LPDWORD lpNumberOfBytesSent,

          IN  DWORD dwFlags,

          IN  CONST struct sockaddr *lpTo,

          IN  INT iToLen,

          IN  LPWSAOVERLAPPED
lpOverlapped,

          IN  LPWSAOVERLAPPED_COMPLETION_ROUTINE
lpCompletionRoutine)

{

    PCATALOG_ENTRY
Provider;

    INT
Errno;

    INT
Code;

ReferenceProviderByHandle((HANDLE)s,
&Provider);//获得服务提供者

// lpWSPSendTo其实是WSPSendto

    Code =
Provider->ProcTable.lpWSPSendTo(s,lpBuffers,dwBufferCount,lpNumberOfBytesSent,

                                           dwFlags, (CONST
LPSOCKADDR)lpTo,iToLen,

                                          
lpOverlapped,lpCompletionRoutine,NULL&Errno);

    DereferenceProviderByPointer(Provider);

    if
(Code == SOCKET_ERROR)

        WSASetLastError(Errno);

    else

        WSASetLastError(0);

    return
Code;

}

 

 

int

WSPSendTo(SOCKET Handle,

          LPWSABUF
lpBuffers,//缓冲数组

          DWORD
dwBufferCount,//缓冲个数

          LPDWORD
lpNumberOfBytesSent,//OUT

          DWORD
iFlags,

          const
struct sockaddr
*SocketAddress,//目标ip:port

          int
SocketAddressLength,

          LPWSAOVERLAPPED
lpOverlapped,//NULL就表示同步模式

          LPWSAOVERLAPPED_COMPLETION_ROUTINE
lpCompletionRoutine,//APC方式

          LPWSATHREADID
lpThreadId,

          LPINT
lpErrno)

{

    HANDLE                  Event
= NULL;

    PSOCKADDR               BindAddress
= NULL;

    Socket
= GetSocketStructure(Handle);

    if
(Socket->SharedData.State == SocketOpen)//if
尚未绑定,就绑定在通派地址上(即全0地址)

    {

        BindAddressLength
= Socket->HelperData->MaxWSAddressLength;

        BindAddress
= HeapAlloc(GlobalHeap,
0, BindAddressLength);

        //即全0地址

        Socket->HelperData->WSHGetWildcardSockaddr(Socket->HelperContext,

                                                  
BindAddress,&BindAddressLength);

        if
(WSPBind(Handle,
BindAddress, BindAddressLength,
lpErrno) == SOCKET_ERROR)

            return
SOCKET_ERROR;

    }

    RemoteAddress
= HeapAlloc(GlobalHeap,
0, 0x6 + SocketAddressLength);

    Status
= NtCreateEvent(&SockEvent,GENERIC_READ | GENERIC_WRITE,NULL, 1, FALSE);

    //将目标地址封装成TDI格式

    RemoteAddress->TAAddressCount = 1;

    RemoteAddress->Address[0].AddressLength
= SocketAddressLength – sizeof(SocketAddress->sa_family);

    RtlCopyMemory(&RemoteAddress->Address[0].AddressType, SocketAddress,
SocketAddressLength);

    SendInfo.BufferArray = (PAFD_WSABUF)lpBuffers;

    SendInfo.AfdFlags = Socket->SharedData.NonBlocking
? AFD_IMMEDIATE : 0;

    SendInfo.BufferCount = dwBufferCount;

    SendInfo.TdiConnection.RemoteAddress
= RemoteAddress;

    SendInfo.TdiConnection.RemoteAddressLength
= Socket->HelperData->MaxTDIAddressLength;

    if
(lpOverlapped == NULL)//if
NULL,就表示同步模式发送数据包

    {

        APCContext
= NULL;

        APCFunction
= NULL;

        Event
= SockEvent;//使用内部事件

        IOSB
= &DummyIOSB;

    }

    else

    {

        if
(lpCompletionRoutine == NULL)//重叠模式发送数据包

        {

            APCContext
= lpOverlapped;

            APCFunction
= NULL;

            Event
= lpOverlapped->hEvent;//看到没,使用重叠结构中用户传入的事件对象

        }

        else

        {

            APCFunction
= NULL;

            APCContext
= lpCompletionRoutine;//这个完成例程实际上是一个APC

            SendInfo.AfdFlags |= AFD_SKIP_FIO;

        }

        IOSB
= (PIO_STATUS_BLOCK)&lpOverlapped->Internal;//采用这个IO状态块

        SendInfo.AfdFlags |= AFD_OVERLAPPED;//标志含有重叠结构,使用异步方式

    }

 

    //关键。向afd中的套接字设备发送一个irp,请求发送udp报文

    Status
= NtDeviceIoControlFile((HANDLE)Handle,//套接字句柄

                                   Event,APCFunction,APCContext,IOSB,

                                   IOCTL_AFD_SEND_DATAGRAM,//控制码

                                  
&SendInfo,sizeof(SendInfo),NULL,0);

//如果用户要求同步方式发送,就一直等待完成

    if
(Status == STATUS_PENDING
&& lpOverlapped == NULL)

    {

        WaitForSingleObject(SockEvent, INFINITE);

        Status
= IOSB->Status;

    }

    if
(Status != STATUS_PENDING)

       SockReenableAsyncSelectEvent(Socket, FD_WRITE);

    return
MsafdReturnWithErrno(Status,
lpErrno, IOSB->Information, lpNumberOfBytesSent);

}

 

看看afd驱动是如何处理应用层发下来的udp报文发送请求这种irp

NTSTATUS

AfdPacketSocketWriteData(PDEVICE_OBJECT
DeviceObject, PIRP
Irp,PIO_STACK_LOCATION
IrpSp)

{

    NTSTATUS
Status = STATUS_SUCCESS;

    PTDI_CONNECTION_INFORMATION
TargetAddress;

    PFILE_OBJECT
FileObject = IrpSp->FileObject;

    PAFD_FCB
FCB = FileObject->FsContext;

    PAFD_SEND_INFO_UDP
SendReq;

ULONG Information;

    if(
!SocketAcquireStateLock( FCB ) ) return LostSocket( Irp );

 

    if(
FCB->State
!= SOCKET_STATE_BOUND )

         return
UnlockAndMaybeComplete ( FCB, STATUS_INVALID_PARAMETER,
Irp, 0 );

    if(
!(SendReq = LockRequest(
Irp, IrpSp ))
)

         return
UnlockAndMaybeComplete ( FCB, STATUS_NO_MEMORY,
Irp, 0 );

    SendReq->BufferArray = LockBuffers(
SendReq->BufferArray,SendReq->BufferCount,

                                       
NULL, NULL,FALSE, FALSE );

    if(
!SendReq->BufferArray
)

         return
UnlockAndMaybeComplete( FCB, STATUS_ACCESS_VIOLATION,Irp, 0 );

    Status
= TdiBuildConnectionInfo( &TargetAddress, SendReq->TdiConnection.RemoteAddress
);

if( NT_SUCCESS(Status) )

{

         FCB->PollState &= ~AFD_EVENT_SEND;

         //关键。构造一个tdi irp发往传输层设备,请求发送udp报文,返回传输层的处理结果

         Status
= TdiSendDatagram

              ( &FCB->SendIrp.InFlightRequest,

                FCB->AddressFile.Object,//绑定的地址文件

                SendReq->BufferArray[0].buf,SendReq->BufferArray[0].len,

                TargetAddress,//目标IP:PORT

                &FCB->SendIrp.Iosb, PacketSocketSendComplete, FCB
);

         ExFreePool(
TargetAddress );

    }

if( Status
== STATUS_PENDING )

Status = STATUS_SUCCESS;//即使传输层尚未完成处理,也标志成功?不明白

    //下面的操作直接就完成了该 socket irp,导致永远不可能返回STATUS_PENDING,也即永远不支持异步方式,这似乎有问题!

     Information
= SendReq->BufferArray[0].len;

     UnlockBuffers(SendReq->BufferArray,
SendReq->BufferCount,
FALSE);

    return
UnlockAndMaybeComplete( FCB, Status, Irp, Information );

}

 

如上,afd驱动处理udp报文发送请求irp时,仅仅是将该irp转换成tdi irp,发给下层的传输层驱动而已。

NTSTATUS TdiSendDatagram(

    PIRP
*Irp,//OUT

    PFILE_OBJECT
TransportObject,//传输层的地址文件对象

    PCHAR
Buffer,

    UINT
BufferLength,

    PTDI_CONNECTION_INFORMATION
Addr,//目标地址

    PIO_STATUS_BLOCK
Iosb,

    PIO_COMPLETION_ROUTINE
CompletionRoutine,

    PVOID
CompletionContext)

{

    PDEVICE_OBJECT
DeviceObject;

    NTSTATUS
Status;

    PMDL
Mdl;

    DeviceObject
= IoGetRelatedDeviceObject(TransportObject);//TCP/UDP/RawIp三者之一

    *Irp
= TdiBuildInternalDeviceControlIrp

         ( TDI_SEND_DATAGRAM,DeviceObject,TransportObject,NULL, Iosb );

    Mdl
= IoAllocateMdl(Buffer,BufferLength,FALSEFALSE,NULL);

    _SEH2_TRY
{

        MmProbeAndLockPages(Mdl, (*Irp)->RequestorMode, IoModifyAccess);//锁定在内存

    } _SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER) {

         IoFreeMdl(Mdl);

        IoCompleteRequest(*Irp, IO_NO_INCREMENT);

        *Irp
= NULL;

        _SEH2_YIELD(return STATUS_INSUFFICIENT_RESOURCES);

} _SEH2_END;

//构造一个报文发送请求irp

TdiBuildSendDatagram (*Irp,DeviceObject,TransportObject,     

CompletionRoutine,CompletionContext,Mdl,BufferLength,Addr);

    Status
= TdiCall(*Irp,
DeviceObject, NULL,
Iosb);//将tdi irp发给下层传输层设备

    return
Status;

}

 

如上,afd层会将发下来的socket irp 转换成tdi irp 发给下层的传输层驱动。

NTSTATUS TdiCall(PIRP Irp,PDEVICE_OBJECT DeviceObject,PKEVENT Event,PIO_STATUS_BLOCK Iosb)

{

    NTSTATUS
Status;

    Status
= IoCallDriver(DeviceObject,
Irp);

if ((Status
== STATUS_PENDING) && (Event != NULL)) //此例Event传入的为NULL

{

        KeWaitForSingleObject(Event,Executive,KernelMode,FALSE,NULL);

        Status
= Iosb->Status;

    }

    return
Status;//返回传输层的处理结果

}

 

 

 

下面看看传输层处理这种irp的过程

NTSTATUS DispTdiSendDatagram(PIRP Irp) //传输层处理udp报文发送请求irp的函数

{

    PIO_STACK_LOCATION
IrpSp;

    TDI_REQUEST
Request;

    PTDI_REQUEST_KERNEL_SENDDG
DgramInfo;

    PTRANSPORT_CONTEXT
TranContext;

    NTSTATUS
Status;

    IrpSp       = IoGetCurrentIrpStackLocation(Irp);

    DgramInfo   = (PTDI_REQUEST_KERNEL_SENDDG)&(IrpSp->Parameters);

    TranContext
= IrpSp->FileObject->FsContext;//传输层的地址描述符

    Request.Handle.AddressHandle
= TranContext->Handle.AddressHandle;//地址对象句柄

    Request.RequestNotifyObject 
= DispDataRequestComplete;

    Request.RequestContext      
= Irp;

    Status
= DispPrepareIrpForCancel(IrpSp->FileObject->FsContext,

        Irp,
(PDRIVER_CANCEL)DispCancelRequest);

if (NT_SUCCESS(Status))

{

          PVOID DataBuffer;

          UINT BufferSize;

         //查询缓冲的地址、长度

          NdisQueryBuffer( (PNDIS_BUFFER)Irp->MdlAddress,&DataBuffer,&BufferSize
);

         if( (*((PADDRESS_FILE)Request.Handle.AddressHandle)->Send != NULL) )

         {

             ULONG DataUsed = 0;

            //Send实际上是UDPSendDatagram函数。这个函数会自动根据目标地址选择合适的本地网卡,将udp报文发给合适的邻接点去

            Status
= (*((PADDRESS_FILE)Request.Handle.AddressHandle)->Send)(

                Request.Handle.AddressHandle,//源地址

                DgramInfo->SendDatagramInformation,//目标地址

                DataBuffer,BufferSize,&DataUsed);

            Irp->IoStatus.Information
= DataUsed;

         }

         else   Status = STATUS_UNSUCCESSFUL;

    }

done:

    if
(Status != STATUS_PENDING)

        DispDataRequestComplete(Irp, Status, Irp->IoStatus.Information);

    else

        IoMarkIrpPending(Irp);

    return
Status;

}

 

下面是UDP协议处理udp报文发送请求irp的函数

NTSTATUS UDPSendDatagram(

    PADDRESS_FILE
AddrFile,//源地址

    PTDI_CONNECTION_INFORMATION
ConnInfo,//目标地址

    PCHAR
BufferData,

    ULONG
DataSize,

    PULONG
DataUsed )

{

    IP_PACKET
Packet;

    PTA_IP_ADDRESS
RemoteAddressTa = (PTA_IP_ADDRESS)ConnInfo->RemoteAddress;

    IP_ADDRESS
RemoteAddress;

    IP_ADDRESS
LocalAddress;

    USHORT
RemotePort;

    NTSTATUS
Status;

    PNEIGHBOR_CACHE_ENTRY
NCE;

    KIRQL
OldIrql;

    LockObject(AddrFile, &OldIrql);

    switch(
RemoteAddressTa->Address[0].AddressType ) {

    case
TDI_ADDRESS_TYPE_IP:

         RemoteAddress.Type = IP_ADDRESS_V4;

         RemoteAddress.Address.IPv4Address
= RemoteAddressTa->Address[0].Address[0].in_addr;

         RemotePort
= RemoteAddressTa->Address[0].Address[0].sin_port;

         break;

    default:

         UnlockObject(AddrFile, OldIrql);

         return
STATUS_UNSUCCESSFUL;

    }

    LocalAddress
= AddrFile->Address;

    if
(AddrIsUnspecified(&LocalAddress))//如果没有指定源地址(即全0的通配地址,很常见)

{

//就根据目标地址和路由表自动算出要将该包发往哪一个邻接点(决定了邻接点,就决定了经由网卡)

        NCE
= RouteGetRouteToDestination( &RemoteAddress );

        LocalAddress
= NCE->Interface->Unicast;//该网卡的IP地址(单播地址)

    }

    Else 。。。//如果用户指定了源地址(也即显式指定了从某块网卡发出去,这种情况很少见)

    //构造一个UDP报文(也即加上一个UDP头部)

    Status
= BuildUDPPacket( AddrFile,&Packet,&RemoteAddress,RemotePort,

                            &LocalAddress,AddrFile->Port,BufferData,DataSize );

 

UnlockObject(AddrFile,
OldIrql);

//调用IPSendDatagram将该UDP报文发给指定邻接点(经由指定的本地网卡),这个函数前面看过,它将报文挂入指定邻接点的发包队列中,时机成熟后再由网卡的小端口驱动提供的发送函数将报文写入网卡内部的硬件发送缓冲区中完成发送。

    if
(!NT_SUCCESS(Status
= IPSendDatagram( &Packet, NCE, UDPSendPacketComplete, NULL
)))

    {

        FreeNdisPacket(Packet.NdisPacket);

        return
Status;

    }

    return
STATUS_SUCCESS;

}

 

邻接点是什么意思呢?我们知道,目标机器可能很远很远,也可能与主机位于同一局域网中。主机发包时不可能直接发送给远程机器,中间经过的第一个路由器就是邻接点。而主机可以安装多块网卡,分属多个不同的局域网,通过多个路由器(网关)连向Internet。因此,发包时,必须算出我们的直接邻接点,首先将包(实际上是帧)发给它。当目标机器就是同局域网内的另外一台机器时,主机就可以直接发给他,不经由路由器,那么,目标机器就是邻接点。当目标机器很远很远不在同一局域网时,就必须选择一个路由器将报文转发出去,这个路由器此时就是我们的邻接点。总之,凡是主机直接相连(指可直接到达)的那些计算机、路由器都叫邻接点。

typedef struct NEIGHBOR_CACHE_ENTRY {  //邻接点描述符

    struct
NEIGHBOR_CACHE_ENTRY *Next;  //下一个

    UCHAR
State;                        //邻接点状态

    UINT
EventTimer;                    /*
Ticks since last event */

    UINT
EventCount;                    /*
Number of events */

    PIP_INTERFACE
Interface;            //关键。主机通往该邻接点的经由网卡

    UINT
LinkAddressLength;             //一般为6B

    PVOID
LinkAddress;                  //该邻接点的MAC地址

    IP_ADDRESS
Address;                 //该邻接点的IP地址

    LIST_ENTRY
PacketQueue;             //该邻接点的发包队列(相当于网卡的发包队列)

} NEIGHBOR_CACHE_ENTRY, *PNEIGHBOR_CACHE_ENTRY;

上面的结构就是ARP协议的基础,缓冲记录了局域网内每个邻接点的IP地址与MAC地址映射情况。

 

 

 

理解了udp报文的发送过程,再看一下udp报文的接收过程。RecvFrom这个API最终进入下面的函数

int

WSPRecvFrom(SOCKET Handle,

            LPWSABUF
lpBuffers,

            DWORD
dwBufferCount,

            LPDWORD
lpNumberOfBytesRead,

            LPDWORD
ReceiveFlags,

            struct
sockaddr *SocketAddress,

            int
*SocketAddressLength,

            LPWSAOVERLAPPED
lpOverlapped,

            LPWSAOVERLAPPED_COMPLETION_ROUTINE
lpCompletionRoutine,

            LPWSATHREADID
lpThreadId,

            LPINT
lpErrno )

{

    HANDLE                      Event
= NULL;

    Socket
= GetSocketStructure(Handle);

    Status
= NtCreateEvent( &SockEvent,
GENERIC_READ | GENERIC_WRITE,NULL, 1, FALSE );

    RecvInfo.BufferArray = (PAFD_WSABUF)lpBuffers;

    RecvInfo.BufferCount = dwBufferCount;

    RecvInfo.TdiFlags = 0;

    RecvInfo.AfdFlags = Socket->SharedData.NonBlocking
? AFD_IMMEDIATE : 0;

    RecvInfo.AddressLength = SocketAddressLength;

    RecvInfo.Address = SocketAddress;

    if
(*ReceiveFlags == 0)

        RecvInfo.TdiFlags |= TDI_RECEIVE_NORMAL;

    else

    {

        if
(*ReceiveFlags & MSG_OOB)

            RecvInfo.TdiFlags |= TDI_RECEIVE_EXPEDITED;

        if
(*ReceiveFlags & MSG_PEEK)

            RecvInfo.TdiFlags |= TDI_RECEIVE_PEEK;

        if
(*ReceiveFlags & MSG_PARTIAL)//是否允许截断接收,用于UDP报文的接收标志

            RecvInfo.TdiFlags |= TDI_RECEIVE_PARTIAL;

    }

    if
(lpOverlapped == NULL)

    {

        APCContext
= NULL;

        APCFunction
= NULL;

        Event
= SockEvent;

        IOSB
= &DummyIOSB;

    }

    else

    {

        if
(lpCompletionRoutine == NULL)

        {

            APCContext
= lpOverlapped;

            APCFunction
= NULL;

            Event
= lpOverlapped->hEvent;

        }

        else

        {

            APCFunction
= NULL;

            APCContext
= lpCompletionRoutine;

            RecvInfo.AfdFlags |= AFD_SKIP_FIO;

        }

        IOSB
= (PIO_STATUS_BLOCK)&lpOverlapped->Internal;

        RecvInfo.AfdFlags |= AFD_OVERLAPPED;

    }

 

    IOSB->Status = STATUS_PENDING;

    //向套接字设备发送‘接收请求’这种socket irp

    Status
= NtDeviceIoControlFile((HANDLE)Handle,Event,APCFunction,APCContext,IOSB,

                                    IOCTL_AFD_RECV_DATAGRAM,//接收UDP数据报

                                   
&RecvInfo,sizeof(RecvInfo),NULL,0);

    if
(Status == STATUS_PENDING
&& lpOverlapped == NULL)

    {

        WaitForSingleObject(SockEvent, INFINITE);

        Status
= IOSB->Status;

    }

    NtClose(
SockEvent );

    *ReceiveFlags
= 0;

    switch
(Status)

    {

        case
STATUS_RECEIVE_EXPEDITED: *ReceiveFlags = MSG_OOB;

            break;

        case
STATUS_RECEIVE_PARTIAL_EXPEDITED:

            *ReceiveFlags = MSG_PARTIAL | MSG_OOB;

            break;

        case
STATUS_RECEIVE_PARTIAL:

            *ReceiveFlags = MSG_PARTIAL;

            break;

    }

    /*
Re-enable Async Event */

    SockReenableAsyncSelectEvent(Socket, FD_READ);

    return
MsafdReturnWithErrno ( Status, lpErrno, IOSB->Information,
lpNumberOfBytesRead );

}

 

看看afd驱动是如何处理‘接收请求’irp

NTSTATUS

AfdPacketSocketReadData(PDEVICE_OBJECT
DeviceObject, PIRP
Irp,PIO_STACK_LOCATION
IrpSp )

{

    NTSTATUS
Status = STATUS_SUCCESS;

    PFILE_OBJECT
FileObject = IrpSp->FileObject;

    PAFD_FCB
FCB = FileObject->FsContext;

    PAFD_RECV_INFO_UDP
RecvReq;

    PLIST_ENTRY
ListEntry;

    PAFD_STORED_DATAGRAM
DatagramRecv;

    if(
!SocketAcquireStateLock( FCB ) ) return LostSocket( Irp );

    if(
FCB->State
!= SOCKET_STATE_BOUND )

         return
UnlockAndMaybeComplete( FCB, STATUS_INVALID_PARAMETER,
Irp, 0 );

    if(
!(RecvReq = LockRequest(
Irp, IrpSp ))
)

         return
UnlockAndMaybeComplete( FCB, STATUS_NO_MEMORY,
Irp, 0 );

 

    RecvReq->BufferArray = LockBuffers(
RecvReq->BufferArray,RecvReq->BufferCount,

                                 RecvReq->Address,RecvReq->AddressLength,TRUE, TRUE );

    //如果这个套接字的接包队列中有现成的包(最好不过)

if( !IsListEmpty(
&FCB->DatagramList
) )

{

         ListEntry
= RemoveHeadList( &FCB->DatagramList );//将包摘下来

         DatagramRecv
= CONTAINING_RECORD( ListEntry,
AFD_STORED_DATAGRAM, ListEntry
);

         if(
DatagramRecv->Len
> RecvReq->BufferArray[0].len &&

              !(RecvReq->TdiFlags
& TDI_RECEIVE_PARTIAL) ) //if 用户不准截断接收

{

              InsertHeadList( &FCB->DatagramList,&DatagramRecv->ListEntry );//挂回去

              Status = Irp->IoStatus.Status = STATUS_BUFFER_TOO_SMALL;

              Irp->IoStatus.Information = DatagramRecv->Len;

              if( !IsListEmpty( &FCB->DatagramList
) )

{

                   FCB->PollState
|= AFD_EVENT_RECEIVE;//标记可接收

                  FCB->PollStatus[FD_READ_BIT] = STATUS_SUCCESS;

                  PollReeval(
FCB->DeviceExt,
FCB->FileObject
);

               }

else  
FCB->PollState
&= ~AFD_EVENT_RECEIVE;

              UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE
);

              return UnlockAndMaybeComplete( FCB, Status, Irp, Irp->IoStatus.Information
);

         }

else //如果用户提供的接收缓冲区够大 或者 用户准许截断接收

{

              Status = SatisfyPacketRecvRequest(
FCB, Irp, DatagramRecv,

                                                 (PUINT)&Irp->IoStatus.Information
);

              if( !IsListEmpty( &FCB->DatagramList
) )

{

                   FCB->PollState
|= AFD_EVENT_RECEIVE;

                  FCB->PollStatus[FD_READ_BIT]
= STATUS_SUCCESS;

                  PollReeval(
FCB->DeviceExt,
FCB->FileObject
);

              }

else  
FCB->PollState
&= ~AFD_EVENT_RECEIVE;

              UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE
);

              return UnlockAndMaybeComplete( FCB, Status, Irp, Irp->IoStatus.Information
);

         }

}

//如果当时没有包

else if( RecvReq->AfdFlags
& AFD_IMMEDIATE ) //if可以立即失败完成返回

{

         Status
= STATUS_CANT_WAIT;

         FCB->PollState &= ~AFD_EVENT_RECEIVE;

         UnlockBuffers(
RecvReq->BufferArray,
RecvReq->BufferCount,
TRUE );

         return
UnlockAndMaybeComplete( FCB, Status, Irp, 0 );

}

else

{

         FCB->PollState &= ~AFD_EVENT_RECEIVE;

         return
LeaveIrpUntilLater( FCB,
Irp, FUNCTION_RECV
);//挂入该套接字的irp队列

    }

}

//如果当时不能满足irp,就挂入队列

NTSTATUS LeaveIrpUntilLater( PAFD_FCB FCB, PIRP Irp, UINT Function )

{

    InsertTailList(
&FCB->PendingIrpList[Function],&Irp->Tail.Overlay.ListEntry );//挂入队列

    IoMarkIrpPending(Irp);

    (void)IoSetCancelRoutine(Irp,
AfdCancelHandler);

    SocketStateUnlock(
FCB );

    return
STATUS_PENDING;

}

 

如上,当应用程序调用RecvFrom向套接字发出收包请求时,afd驱动会检查那个套接字的收包队列中是否有包,若有,就满足请求立即返回,否则,将irp挂入队列等待以后完成该irp请求。那么,什么时候该irp会满足完成呢?要想得到这个问题的答案,回顾一下,当网卡收到包时,会触发中断进入小端口驱动的isr,最后小端口驱动会将包上交给各个绑定协议。当上交给tcpip时,tcpip会根据报文头部的协议、目标ip地址、目标端口号找到所有符合条件的传输层地址对象,然后调用DGDeliverData函数将报文投递给那些符合条件的地址对象。

VOID DGDeliverData( //将收到的报文投递给目标地址文件

  PADDRESS_FILE
AddrFile,//符合条件的目标地址文件对象

  PIP_ADDRESS
SrcAddress,//报文头部中的源地址

  PIP_ADDRESS
DstAddress, //报文头部中的目标地址

  USHORT
SrcPort, //报文头部中的源端口

  USHORT
DstPort, //报文头部中的目标端口

  PIP_PACKET
IPPacket,//下层小端口提交上来的报文

  UINT
DataSize)//报文的长度

{

  LockObject(AddrFile, &OldIrql);

  if
(AddrFile->Protocol
== IPPROTO_UDP)

      DataBuffer
= IPPacket->Data;

  else

  {

      if
(AddrFile->HeaderIncl)

          DataBuffer
= IPPacket->Header;

      else

      {

          DataBuffer = IPPacket->Data;

          DataSize
-= IPPacket->HeaderSize;

      }

  }

//如果该地址文件的接收请求队列不空,将收到的包满足 给 接收请求

  if
(!IsListEmpty(&AddrFile->ReceiveQueue))

  {

      PLIST_ENTRY
CurrentEntry;

      PDATAGRAM_RECEIVE_REQUEST
Current = NULL;

      PTA_IP_ADDRESS
RTAIPAddress;

      CurrentEntry
= AddrFile->ReceiveQueue.Flink;

      while(CurrentEntry != &AddrFile->ReceiveQueue)

{

          Current
= CONTAINING_RECORD(CurrentEntry,
DATAGRAM_RECEIVE_REQUEST, ListEntry);

          CurrentEntry
= CurrentEntry->Flink;

          if( DstPort == AddrFile->Port && (AddrIsEqual(DstAddress, &AddrFile->Address) ||

             AddrIsUnspecified(&AddrFile->Address) || AddrIsUnspecified(DstAddress)))

{

           RemoveEntryList(&Current->ListEntry);//摘下一个请求给予满足

           RtlCopyMemory(
Current->Buffer,DataBuffer,MIN(Current->BufferSize,
DataSize) );

           RTAIPAddress
= (PTA_IP_ADDRESS)Current->ReturnInfo->RemoteAddress;

           RTAIPAddress->TAAddressCount = 1;

           RTAIPAddress->Address->AddressType
= TDI_ADDRESS_TYPE_IP;

           RTAIPAddress->Address->Address->sin_port = SrcPort;

           RtlCopyMemory(
&RTAIPAddress->Address->Address->in_addr,

               &SrcAddress->Address.IPv4Address,sizeof(SrcAddress->Address.IPv4Address)
);

           if
(Current->BufferSize
< DataSize)

             Current->Complete(Current->Context,STATUS_BUFFER_OVERFLOW,Current->BufferSize);

           Else //关键。调用那个接收请求的完成函数,以继续发出一个接收请求。这样,没满足一个请求后,就立即再发出一个接收请求,从而使得传输层能够源源不断收到接收请求。

             Current->Complete(Current->Context,
STATUS_SUCCESS, DataSize);

           Break;//仅仅满足一个接收请求

          }

      }

  }

  else
…  //如果收到包的时候,那个地址对象没有任何接收请求,就丢弃包,这就是为什么UDP协议是不可靠的,即使网络线路100%不出故障,也会因为目标机器接收速度
搞不过 发送速度 而 丢包。

}

 

看到没,传输层一收到udp报文,就会把报文满足给接收请求。每个地址对象内部维护着一个接收请求队列,当udp套接字一绑定地址对象时,就会立马向传输层发出一个接收请求,挂入相应地址对象的接收请求队列中,这就是第一个udp接收请求的产生时机(第一个接收请求是在AfdBindSocket函数内部调用TdiReceiveDatagram发出的)

 

回顾一下绑定过程:

AfdBindSocket(PDEVICE_OBJECT DeviceObject, PIRP Irp,PIO_STACK_LOCATION
IrpSp)

{   。。。

if( FCB->Flags & AFD_ENDPOINT_CONNECTIONLESS
)

{

     Status
= TdiReceiveDatagram

         ( &FCB->ReceiveIrp.InFlightRequest,//InFlightRequest表示当前生成的irp,用于复用

FCB->AddressFile.Object,//要发往的目标传输层地址对象

0,//flags

           FCB->Recv.Window,//udp接收缓冲区,这是一个中间缓冲,用于复用

FCB->Recv.Size,//默认总为16384B

FCB->AddressFrom,  &FCB->ReceiveIrp.Iosb,

PacketSocketRecvComplete,FCB );//关键。完成例程

     if(
Status == STATUS_PENDING
) Status = STATUS_SUCCESS;

}

。。。

}

如上,刚一完成绑定,就立即向传输层对应的地址对象投递一个接收请求

 

NTSTATUS TdiReceiveDatagram(        //构造一个irp,发往传输层

    PIRP
*Irp,//生成一个要发往传输层的tdi irp

    PFILE_OBJECT
TransportObject,//传输层的地址文件对象

    USHORT
Flags,

    PCHAR
Buffer,//接收缓冲

    UINT
BufferLength,

    PTDI_CONNECTION_INFORMATION
Addr,//from地址

    PIO_STATUS_BLOCK
Iosb,

    PIO_COMPLETION_ROUTINE
CompletionRoutine,

    PVOID
CompletionContext)

{

    PDEVICE_OBJECT
DeviceObject;

    NTSTATUS
Status;

    PMDL
Mdl;

    DeviceObject
= IoGetRelatedDeviceObject(TransportObject);//\Device\Udp

    *Irp
= TdiBuildInternalDeviceControlIrp

         ( TDI_RECEIVE_DATAGRAM,DeviceObject,TransportObject,NULL,Iosb );

    Mdl
= IoAllocateMdl(Buffer,BufferLength,FALSE,FALSE,NULL);

    _SEH2_TRY
{

        MmProbeAndLockPages(Mdl, (*Irp)->RequestorMode, IoModifyAccess);

    } _SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER) {

        IoFreeMdl(Mdl);

        IoCompleteRequest(*Irp, IO_NO_INCREMENT);

        *Irp
= NULL;

        _SEH2_YIELD(return STATUS_INSUFFICIENT_RESOURCES);

    } _SEH2_END;

    TdiBuildReceiveDatagram
(*Irp,DeviceObjectTransportObject,

          CompletionRoutine,CompletionContext,

          Mdl,BufferLength,Addr,Addr,Flags);

    Status
= TdiCall(*Irp,
DeviceObject, NULL,
Iosb);//将irp发往传输层

    return
Status;

}

 

看看传输层是如何处理afd层发下来的udp报文接收请求的

NTSTATUS DispTdiReceiveDatagram(PIRP Irp) //处理udp接收请求的函数

{

  PIO_STACK_LOCATION
IrpSp;

  PTDI_REQUEST_KERNEL_RECEIVEDG
DgramInfo;

  PTRANSPORT_CONTEXT
TranContext;

  TDI_REQUEST
Request;

  NTSTATUS
Status;

  ULONG
BytesReceived = 0;

  IrpSp     = IoGetCurrentIrpStackLocation(Irp);

  DgramInfo
= (PTDI_REQUEST_KERNEL_RECEIVEDG)&(IrpSp->Parameters);

  TranContext
= IrpSp->FileObject->FsContext;

  /*
Initialize a receive request */

  Request.Handle.AddressHandle
= TranContext->Handle.AddressHandle;

  Request.RequestNotifyObject 
= DispDataRequestComplete;

  Request.RequestContext      
= Irp;

  Status
= DispPrepareIrpForCancel(IrpSp->FileObject->FsContext,Irp,

    (PDRIVER_CANCEL)DispCancelRequest);

  if
(NT_SUCCESS(Status))

  {

     PVOID
DataBuffer;

     UINT
BufferSize;

     NdisQueryBuffer(
(PNDIS_BUFFER)Irp->MdlAddress,&DataBuffer,&BufferSize );

     

      Status
= DGReceiveDatagram(

      
Request.Handle.AddressHandle,//生成一个接收请求(非irp)挂入这个地址对象的请求队列中

      
DgramInfo->ReceiveDatagramInformation,

      
DataBuffer,//实际上是window

      
DgramInfo->ReceiveLength,//16384B

      
DgramInfo->ReceiveFlags,//0

      
DgramInfo->ReturnDatagramInformation,

      
&BytesReceived,

      
(PDATAGRAM_COMPLETION_ROUTINE)DispDataRequestComplete,

      
Irp,Irp);

   }

done:

   if
(Status != STATUS_PENDING)

       DispDataRequestComplete(Irp, Status, BytesReceived);

   else

       IoMarkIrpPending(Irp);

  return
Status;

}

 

实际的处理工作在DGReceiveDatagram函数中,我们看

NTSTATUS DGReceiveDatagram(

    PADDRESS_FILE
AddrFile,//将生产的接收请求挂入这个地址对象的接收请求队列中

    PTDI_CONNECTION_INFORMATION
ConnInfo,

    PCHAR
BufferData,

    ULONG
ReceiveLength,

    ULONG
ReceiveFlags,

    PTDI_CONNECTION_INFORMATION
ReturnInfo,

    PULONG
BytesReceived,

    PDATAGRAM_COMPLETION_ROUTINE
Complete,

    PVOID
Context,

    PIRP
Irp)

{

    NTSTATUS
Status;

    PDATAGRAM_RECEIVE_REQUEST
ReceiveRequest;

    KIRQL
OldIrql;

    LockObject(AddrFile, &OldIrql);

    //关键。分配构造一个接收请求(非irp

    ReceiveRequest
= ExAllocatePoolWithTag(NonPagedPool, sizeof(DATAGRAM_RECEIVE_REQUEST));

    if
(ReceiveRequest)

    {

     if
((ConnInfo->RemoteAddressLength
!= 0) && (ConnInfo->RemoteAddress))

     {

         Status = AddrGetAddress(ConnInfo->RemoteAddress,

                       &ReceiveRequest->RemoteAddress, 
&ReceiveRequest->RemotePort);

     }

     else

     {

         ReceiveRequest->RemotePort = 0;

         AddrInitIPv4(&ReceiveRequest->RemoteAddress,
0);

     }

     IoMarkIrpPending(Irp);

     ReceiveRequest->ReturnInfo = ReturnInfo;

     ReceiveRequest->Buffer = BufferData;

     ReceiveRequest->BufferSize = ReceiveLength;

     ReceiveRequest->UserComplete = Complete;//指DispDataRequestComplete

     ReceiveRequest->UserContext = Context;//指 irp

     ReceiveRequest->Complete = (PDATAGRAM_COMPLETION_ROUTINE)DGReceiveComplete;//完成函数

     ReceiveRequest->Context = ReceiveRequest;

     ReceiveRequest->AddressFile = AddrFile;

     ReceiveRequest->Irp = Irp;

     //关键,挂入目标地址对象的接收请求队列

     InsertTailList(&AddrFile->ReceiveQueue,
&ReceiveRequest->ListEntry);

     return
STATUS_PENDING;

    }

    return
Status;

}

 

前面我们看到,每当满足一个接收请求后,会调用它的完成函数,以继续发出接收请求。我们看看那个完成函数是不是这样做的。

VOID DGReceiveComplete(PVOID Context, NTSTATUS Status, ULONG Count)

{

PDATAGRAM_RECEIVE_REQUEST ReceiveRequest
= Context;

//调用用户设置的完成函数,即DispDataRequestComplete函数

    ReceiveRequest->UserComplete( ReceiveRequest->UserContext, Status,
Count );

    ExFreePoolWithTag(
ReceiveRequest, DATAGRAM_RECV_TAG
);

}

 

VOID DispDataRequestComplete(

    PVOID
Context,

    NTSTATUS
Status,

    ULONG
Count)

{

    PIRP
Irp = Context;

    Irp->IoStatus.Status      = Status;

    Irp->IoStatus.Information
= Count;

   
IRPFinish(Irp,
Status);

}

 

NTSTATUS IRPFinish( PIRP Irp, NTSTATUS Status )

{

    KIRQL
OldIrql;

    Irp->IoStatus.Status = Status;

    if(
Status == STATUS_PENDING
)

     IoMarkIrpPending(
Irp );

    else
{

        IoAcquireCancelSpinLock(&OldIrql);

     (void)IoSetCancelRoutine( Irp,
NULL );

        IoReleaseCancelSpinLock(OldIrql);

     IoCompleteRequest(
Irp, IO_NETWORK_INCREMENT
);//关键。完成该irp

    }

    return
Status;

}

 

IoCompleteRequest中会调用最初设置的完成例程,即PacketSocketRecvComplete函数

NTSTATUS //每当完成了一个udp接收请求后调用执行这个函数

PacketSocketRecvComplete(PDEVICE_OBJECT
DeviceObject,PIRP
Irp,PVOID Context )

{

    NTSTATUS
Status = STATUS_SUCCESS;

    PAFD_FCB
FCB = Context;

    PIRP
NextIrp;

    PIO_STACK_LOCATION
NextIrpSp;

    PLIST_ENTRY
ListEntry;

    PAFD_RECV_INFO
RecvReq;

    PAFD_STORED_DATAGRAM
DatagramRecv;

    UINT
DGSize = Irp->IoStatus.Information
+ sizeof( AFD_STORED_DATAGRAM
);

    PLIST_ENTRY
NextIrpEntry, DatagramRecvEntry;

    if(
!SocketAcquireStateLock( FCB ) )   return STATUS_FILE_CLOSED;

    FCB->ReceiveIrp.InFlightRequest
= NULL; //当前irp置空

if( FCB->State == SOCKET_STATE_CLOSED
) 。。。

//关键。分配一个AFD_STORED_DATAGRAM结构,即套接字接包队列中的结构

    DatagramRecv
= ExAllocatePool( NonPagedPool,
DGSize );

DatagramRecv->Len
= Irp->IoStatus.Information;

//所有接下来的udp报文都临时寄存在Window中,可以看出这个Window这仅仅用作中转

RtlCopyMemory( DatagramRecv->Buffer, FCB->Recv.Window,DatagramRecv->Len
);

DatagramRecv->Address
= TaCopyTransportAddress( FCB->AddressFrom->RemoteAddress );

    //关键。将包挂入套接字的接包队列中,这样,当上层应用程序发出接收请求时就直接从这个队列取出包来满足它。

InsertTailList( &FCB->DatagramList, &DatagramRecv->ListEntry );

    //下面的循环,顺带检查一下当前是否有irp在等候,若有,就立即满足它

    while(
!IsListEmpty( &FCB->DatagramList ) &&

            !IsListEmpty(
&FCB->PendingIrpList[FUNCTION_RECV] ) )

{

         ListEntry
= RemoveHeadList( &FCB->DatagramList );

         DatagramRecv
= CONTAINING_RECORD( ListEntry,
AFD_STORED_DATAGRAM,ListEntry
);

         ListEntry
= RemoveHeadList( &FCB->PendingIrpList[FUNCTION_RECV] );

         NextIrp
= CONTAINING_RECORD( ListEntry,
IRP, Tail.Overlay.ListEntry
);

         NextIrpSp
= IoGetCurrentIrpStackLocation( NextIrp );

         RecvReq
= NextIrpSp->Parameters.DeviceIoControl.Type3InputBuffer;

         if(
DatagramRecv->Len
> RecvReq->BufferArray[0].len &&

              !(RecvReq->TdiFlags
& TDI_RECEIVE_PARTIAL) )

{

              InsertHeadList( &FCB->DatagramList,&DatagramRecv->ListEntry );//挂回去

              Status = NextIrp->IoStatus.Status = STATUS_BUFFER_TOO_SMALL;

              NextIrp->IoStatus.Information = DatagramRecv->Len;

              UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE
);

if ( NextIrp->MdlAddress ) UnlockRequest(
NextIrp,IoGetCurrentIrpStackLocation(
NextIrp ) );

              IoSetCancelRoutine(NextIrp,
NULL);

              IoCompleteRequest( NextIrp, IO_NETWORK_INCREMENT );

         }

else

{

              Status = SatisfyPacketRecvRequest
( FCB, NextIrp,
DatagramRecv,

                     (PUINT)&NextIrp->IoStatus.Information );

              UnlockBuffers( RecvReq->BufferArray, RecvReq->BufferCount, TRUE
);

            if
( NextIrp->MdlAddress
)

UnlockRequest( NextIrp,
IoGetCurrentIrpStackLocation( NextIrp ) );

              IoCompleteRequest( NextIrp, IO_NETWORK_INCREMENT );

         }

    }

if( !IsListEmpty(
&FCB->DatagramList
) )

{

        FCB->PollState |= AFD_EVENT_RECEIVE;

        FCB->PollStatus[FD_READ_BIT]
= STATUS_SUCCESS;

        PollReeval(
FCB->DeviceExt,
FCB->FileObject
);

}

Else   FCB->PollState
&= ~AFD_EVENT_RECEIVE;

 

    //关键。果然,又立马向传输层发出一个接收请求。因为传输层收到包后,若发现当前没有接收请求,就会丢包。所以必须在afd驱动层接收到一个udp包后,赶紧立即再向传输层发请求过去。

if( NT_SUCCESS(Irp->IoStatus.Status) )

{

         Status
= TdiReceiveDatagram

              ( &FCB->ReceiveIrp.InFlightRequest,

                FCB->AddressFile.Object,

                0,

                FCB->Recv.Window,

                FCB->Recv.Size,

                FCB->AddressFrom,

                &FCB->ReceiveIrp.Iosb,

                PacketSocketRecvComplete,

                FCB );

    }

    SocketStateUnlock(
FCB );

    return
STATUS_SUCCESS;

}

 

 

 

 

总结一下协议驱动、小端口驱动之间的交互步骤:

1、  系统启动时加载ndis.sys模块,建立起ndis基础运行环境

2、  安装加载各种协议驱动,在DriverEntry中注册协议特征,即各种回调函数

3、  安装网卡,加载小端口驱动,在DriverEntry中注册小端口特征,即各种回调函数

4、  进入小端口驱动的AddDevicendis自动为我们创建一个小端口设备对象,加入堆栈

5、  系统为这个网卡分配端口、中断号等资源

6、  启动网卡设备,进入小端口驱动注册的初始化例程

7、  在小端口驱动的初始化例程中:初始化硬件寄存器、注册中断向量、分配自定义设备扩展等典型工作

8、  网卡启动初始化完毕后,ndis框架调用各协议驱动提供的绑定回调函数,通知绑定

9、  进入各协议驱动提供的绑定回调函数,我们要调用NdisOpenAdapter打开网卡进行绑定

10、Ndis框架调用绑定完成回调函数 或 我们自己手动模拟调用

11、网卡收到一个数据包,触发中断,进入ndis托管的isr

12、托管Isr进入我们注册的isr和后半部

13、我们的isr调用NdisMEthIndicateReceive这个宏,调用各绑定协议提供的接收回调函数,向上提交

14、进入各个协议的接收回调函数(ReceivePacketHandlerReceiveHandler

15、后面怎么处理收到的包自行决定

 

 

打赏