简体   繁体   中英

Use line and column numbers to map a source code variable back to its definition in a structure variable in c

If I have a c program like this: node_s and list_s struct is defined and list_get_first is a function defined to get its first element data;

typedef struct node_s {
    void          *data;
    struct node_s *next;
    struct node_s *prev;
} Node;

struct list_s {
    size_t  size;
    Node   *head;
    Node   *tail;
};

int list_get_first(CC_List *list, void **out)
{
    if (list->size == 0)                       // list->size: 9:14
        return -1;

    *out = list->head->data;                   // list->head->data: 12:13
    return 1;
}
...
  

Suppose I can get the debug information at runtime for list->size: 9:14 and list->head->data: 12:13 . Is there a simple way to map that debug information back to its definition with the structure name and the associated field name automatically? I'm using clang and llvm for compilation and code analysis.

You can write an LLVM pass to do this job.

  1. First, make sure you generate debug information using '-g' in clang and emit LLVM IR using '-S -emit-llvm'
  2. Create a custom LLVM pass, try to find all the variables you need.
  3. For each load instruction, you have to find the definition of types of those variables. Try to get the debug information of them.
  4. Create a map for each variable to its definition in struct.

The following function shows where the information you will be interested are contained in the metadata. You can get them and create the map for each variable.

static void findStructType(Module &M) {
  LLVM_DEBUG(M.dump());
  for (auto &F : M) {
    if (F.isDeclaration()) continue;
    for (auto &BB : F) {
      for (auto &I : BB) {
        if (I.getOpcode() == Instruction::Load) {
          if (I.hasMetadata("dbg")) {
            // This will give the line number for this load
            auto* node = dyn_cast<DILocation>(I.getMetadata("dbg"));
            node->dump();
          }
          // Here has the structure information for your structure
          if (F.hasMetadata("dbg")) {
            auto* subprog = dyn_cast<DISubprogram>(F.getMetadata("dbg"));
            subprog->getType()->dumpTree();
          }

          if (I.hasMetadata("tbaa")) {
            // This use TBAA for getting the type of this load;
            auto* tbaa = dyn_cast<MDNode>(I.getMetadata("tbaa"));  
            tbaa->getOperand(0)->dump(); // the struct type
            tbaa->getOperand(1)->dump(); // what is loading
            tbaa->getOperand(2)->dump(); // offset
          }
        }
      }
    }
  }
}

This is what I can get from your example code (only the first load instruction):

<0x55e7875d1470> = !DISubroutineType(types: <0x55e787514828>)
  <0x55e787514828> = !{<0x55e7875cfb58>, <0x55e7875d1380>, <0x55e7875d1400>}
    <0x55e7875cfb58> = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
    <0x55e7875d1380> = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: <0x55e7875d0230>, size: 64)
      <0x55e7875d0230> = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "list_s", file: <0x55e7875cc850>, line: 9, size: 192, elements: <0x55e78755c878>)
        <0x55e7875cc850> = !DIFile(filename: "example/dwarf/p1.c", directory: "/home/riple/IPRA-exp", checksumkind: CSK_MD5, checksum: "b2e16825badf1eb53bc8cb87df801efe")
        <0x55e78755c878> = !{<0x55e7875147c0>, <0x55e7875d1280>, <0x55e7875d1300>}
          <0x55e7875147c0> = !DIDerivedType(tag: DW_TAG_member, name: "size", scope: <0x55e7875d0230>, file: <0x55e7875cc850>, line: 10, baseType: <0x55e7875d0800>, size: 64)
            <0x55e7875d0800> = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: <0x55e7875d0780>, line: 46, baseType: <0x55e7875d06c8>)
              <0x55e7875d0780> = !DIFile(filename: "install/llvm/lib/clang/16.0.0/include/stddef.h", directory: "/home/riple/IPRA-exp", checksumkind: CSK_MD5, checksum: "89af46ad12914219411a5eaafda2889f")
              <0x55e7875d06c8> = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned)
          <0x55e7875d1280> = !DIDerivedType(tag: DW_TAG_member, name: "head", scope: <0x55e7875d0230>, file: <0x55e7875cc850>, line: 11, baseType: <0x55e7875d1200>, size: 64, offset: 64)
            <0x55e7875d1200> = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: <0x55e7875d1180>, size: 64)
              <0x55e7875d1180> = !DIDerivedType(tag: DW_TAG_typedef, name: "Node", file: <0x55e7875cc850>, line: 7, baseType: <0x55e7875d0c70>)
                <0x55e7875d0c70> = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "node_s", file: <0x55e7875cc850>, line: 3, size: 192, elements: <0x55e787593388>)
                  <0x55e787593388> = !{<0x55e7875d0d70>, <0x55e7875d0e70>, <0x55e7875d0ef0>}
                    <0x55e7875d0d70> = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: <0x55e7875d0c70>, file: <0x55e7875cc850>, line: 4, baseType: <0x55e7875d0cf0>, size: 64)
                      <0x55e7875d0cf0> = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64)
                    <0x55e7875d0e70> = !DIDerivedType(tag: DW_TAG_member, name: "next", scope: <0x55e7875d0c70>, file: <0x55e7875cc850>, line: 5, baseType: <0x55e7875d0df0>, size: 64, offset: 64)
                      <0x55e7875d0df0> = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: <0x55e7875d0c70>, size: 64)
                    <0x55e7875d0ef0> = !DIDerivedType(tag: DW_TAG_member, name: "prev", scope: <0x55e7875d0c70>, file: <0x55e7875cc850>, line: 6, baseType: <0x55e7875d0df0>, size: 64, offset: 128)
          <0x55e7875d1300> = !DIDerivedType(tag: DW_TAG_member, name: "tail", scope: <0x55e7875d0230>, file: <0x55e7875cc850>, line: 12, baseType: <0x55e7875d1200>, size: 64, offset: 128)
    <0x55e7875d1400> = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: <0x55e7875d0cf0>, size: 64)
<0x55e7875d7878> = !{!"list_s", <0x55e7875d7628>, i64 0, <0x55e7875d36d8>, i64 8, <0x55e7875d36d8>, i64 16}
<0x55e7875d7628> = !{!"long", <0x55e7875d3698>, i64 0}
i64 0

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM