■ ■ ■ ■ ■ ■
sub3suite/src/modules/passive/AbstractOsintModule.h
| skipped 3 lines |
4 | 4 | | #include <QObject> |
5 | 5 | | #include <QThread> |
6 | 6 | | #include <QQueue> |
| 7 | + | #include <QStack> |
7 | 8 | | #include <QNetworkReply> |
8 | 9 | | #include <QNetworkRequest> |
9 | 10 | | |
| skipped 91 lines |
101 | 102 | | }; |
102 | 103 | | |
103 | 104 | | /// |
104 | | - | /// getting html body node... |
| 105 | + | /// Commonly used gumbo-parser methods... |
105 | 106 | | /// |
106 | 107 | | |
107 | | - | GumboNode* getBody(GumboNode *node) { |
108 | | - | for(unsigned int i = 0; i < node->v.element.children.length; i++) |
| 108 | + | class GumboMethods { |
| 109 | + | public: |
| 110 | + | GumboMethods() |
| 111 | + | {} |
| 112 | + | ~GumboMethods() |
| 113 | + | {} |
| 114 | + | |
| 115 | + | /* getting html <body> node */ |
| 116 | + | static GumboNode* getBody(GumboNode *node) { |
| 117 | + | for(unsigned int i = 0; i < node->v.element.children.length; i++) |
| 118 | + | { |
| 119 | + | GumboNode *child = static_cast<GumboNode*>(node->v.element.children.data[i]); |
| 120 | + | if(child->type == GUMBO_NODE_ELEMENT && child->v.element.tag == GUMBO_TAG_BODY) |
| 121 | + | return child; |
| 122 | + | } |
| 123 | + | return nullptr; |
| 124 | + | } |
| 125 | + | |
| 126 | + | /* getting document's title */ |
| 127 | + | static QString getTitle(QNetworkReply *reply) { |
| 128 | + | QString title; |
| 129 | + | QStack<GumboNode*> node_stack; |
| 130 | + | GumboOutput *output = gumbo_parse(reply->readAll()); |
| 131 | + | node_stack.push(GumboMethods::getBody(output->root)); |
| 132 | + | |
| 133 | + | GumboNode *node; |
| 134 | + | while(!node_stack.isEmpty()) |
| 135 | + | { |
| 136 | + | node = node_stack.pop(); |
| 137 | + | if (node->type != GUMBO_NODE_ELEMENT) |
| 138 | + | continue; |
| 139 | + | |
| 140 | + | if (node->v.element.tag == GUMBO_TAG_H1){ |
| 141 | + | GumboNode* title_text = static_cast<GumboNode*>(node->v.element.children.data[0]); |
| 142 | + | title = QString::fromUtf8(title_text->v.text.text); |
| 143 | + | break; |
| 144 | + | } |
| 145 | + | |
| 146 | + | GumboVector *children = &node->v.element.children; |
| 147 | + | for(unsigned int i = 0; i < children->length; i++) |
| 148 | + | node_stack.push(static_cast<GumboNode*>(children->data[i])); |
| 149 | + | } |
| 150 | + | |
| 151 | + | gumbo_destroy_output(&kGumboDefaultOptions, output); |
| 152 | + | return title; |
| 153 | + | } |
| 154 | + | |
| 155 | + | /* getting links from html document */ |
| 156 | + | static QSet<QString> getLinks(QNetworkReply *reply) |
109 | 157 | | { |
110 | | - | GumboNode *child = static_cast<GumboNode*>(node->v.element.children.data[i]); |
111 | | - | if(child->type == GUMBO_NODE_ELEMENT && child->v.element.tag == GUMBO_TAG_BODY) |
112 | | - | return child; |
| 158 | + | QSet<QString> links; |
| 159 | + | QStack<GumboNode*> node_stack; |
| 160 | + | GumboOutput *output = gumbo_parse(reply->readAll()); |
| 161 | + | node_stack.push(GumboMethods::getBody(output->root)); |
| 162 | + | |
| 163 | + | GumboNode *node; |
| 164 | + | while(!node_stack.isEmpty()) |
| 165 | + | { |
| 166 | + | node = node_stack.pop(); |
| 167 | + | if (node->type != GUMBO_NODE_ELEMENT) |
| 168 | + | continue; |
| 169 | + | |
| 170 | + | if (node->v.element.tag == GUMBO_TAG_A){ |
| 171 | + | GumboAttribute* href = gumbo_get_attribute(&node->v.element.attributes, "href"); |
| 172 | + | if (href) |
| 173 | + | links.insert(QString::fromUtf8(href->value)); |
| 174 | + | continue; |
| 175 | + | } |
| 176 | + | |
| 177 | + | GumboVector *children = &node->v.element.children; |
| 178 | + | for(unsigned int i = 0; i < children->length; i++) |
| 179 | + | node_stack.push(static_cast<GumboNode*>(children->data[i])); |
| 180 | + | } |
| 181 | + | |
| 182 | + | gumbo_destroy_output(&kGumboDefaultOptions, output); |
| 183 | + | return links; |
113 | 184 | | } |
114 | | - | return nullptr; |
115 | | - | } |
| 185 | + | |
| 186 | + | private: |
| 187 | + | Q_DISABLE_COPY(GumboMethods) |
| 188 | + | |
| 189 | + | }; |
116 | 190 | | |
117 | 191 | | /// |
118 | 192 | | /// \brief The AbstractOsintModule class |
| skipped 215 lines |