■ ■ ■ ■ ■ ■
maryam/core/util/engines/google.py
| skipped 84 lines |
85 | 85 | | card_xpath_name = { |
86 | 86 | | 'card': '//div[@id="wp-tabs-container"]', |
87 | 87 | | 'card_content': './/div[@class="kno-rdesc"]', |
88 | | - | 'card_info': './/div[@class="rVusze"]' |
| 88 | + | 'card_info': './/div[@class="rVusze"]||' |
89 | 89 | | } |
90 | 90 | | xpath = { |
91 | 91 | | card_xpath_name['card']: [ |
| skipped 27 lines |
119 | 119 | | xpath_results = parser.html_fromstring(xpath) |
120 | 120 | | xpath_results2 = parser.html_fromstring(xpath2) |
121 | 121 | | xpath_results3 = parser.html_fromstring(xpath3) |
| 122 | + | |
122 | 123 | | output = {'content': '', 'info': []} |
| 124 | + | |
| 125 | + | content = parser.html_fromstring('.//div[@class="kno-rdesc"]') |
| 126 | + | if content: |
| 127 | + | output['content'] = content[0].text_content() |
| 128 | + | if output['content'].startswith('Description'): |
| 129 | + | output['content'] = output['content'].replace('Description', '') |
| 130 | + | |
| 131 | + | for i in parser.html_fromstring('.//div[@class="rVusze"]'): |
| 132 | + | if i.text_content(): |
| 133 | + | output['info'].append(i.text_content().replace('\xa0', ' ')) |
| 134 | + | |
123 | 135 | | root = xpath_results[card_xpath_name['card']] |
124 | 136 | | root2 = xpath_results2[card_xpath_second['card']] |
125 | 137 | | root3 = xpath_results3[card_xpath_social['card']] |
126 | | - | if root[card_xpath_name['card_content']]: |
| 138 | + | if root[card_xpath_name['card_content']] and output['content'] == '': |
127 | 139 | | output['content'] = root[card_xpath_name['card_content']][0].text_content() |
128 | 140 | | else: |
129 | 141 | | card_xpath_name = { |
| skipped 9 lines |
139 | 151 | | } |
140 | 152 | | xpath_results = parser.html_fromstring(xpath) |
141 | 153 | | root = xpath_results[card_xpath_name['card']] |
142 | | - | if root[card_xpath_name['card_content']]: |
| 154 | + | if root[card_xpath_name['card_content']] and output['content'] == '': |
143 | 155 | | output['content'] = root[card_xpath_name['card_content']][0].text_content() |
144 | | - | else: |
145 | | - | output['content'] = '' |
146 | 156 | | img = root2[card_xpath_second['card_img']] |
147 | 157 | | name = root2[card_xpath_second['card_name']] |
148 | 158 | | known_as = root2[card_xpath_second['card_known_as']] |
| skipped 9 lines |
158 | 168 | | output['social'] = [] |
159 | 169 | | for piece in social: |
160 | 170 | | output['social'].append(piece.get('href')) |
| 171 | + | output['info'] = list(set(output['info'])) |
161 | 172 | | return output |
162 | 173 | | |
163 | 174 | | @property |
| skipped 26 lines |