mirror of https://github.com/helix-editor/helix
vendor tree-sitter lib
parent
37397ecc6d
commit
782a34941e
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2018-2024 Max Brunsfeld
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,48 @@
|
||||
#include "alloc.h"
|
||||
#include "tree_sitter/api.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
static void *ts_malloc_default(size_t size) {
|
||||
void *result = malloc(size);
|
||||
if (size > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
|
||||
abort();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void *ts_calloc_default(size_t count, size_t size) {
|
||||
void *result = calloc(count, size);
|
||||
if (count > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
|
||||
abort();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static void *ts_realloc_default(void *buffer, size_t size) {
|
||||
void *result = realloc(buffer, size);
|
||||
if (size > 0 && !result) {
|
||||
fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
|
||||
abort();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Allow clients to override allocation functions dynamically
|
||||
TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
|
||||
TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
|
||||
TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
|
||||
TS_PUBLIC void (*ts_current_free)(void *) = free;
|
||||
|
||||
void ts_set_allocator(
|
||||
void *(*new_malloc)(size_t size),
|
||||
void *(*new_calloc)(size_t count, size_t size),
|
||||
void *(*new_realloc)(void *ptr, size_t size),
|
||||
void (*new_free)(void *ptr)
|
||||
) {
|
||||
ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default;
|
||||
ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default;
|
||||
ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default;
|
||||
ts_current_free = new_free ? new_free : free;
|
||||
}
|
@ -0,0 +1,41 @@
|
||||
#ifndef TREE_SITTER_ALLOC_H_
|
||||
#define TREE_SITTER_ALLOC_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
|
||||
#define TS_PUBLIC
|
||||
#else
|
||||
#define TS_PUBLIC __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
||||
TS_PUBLIC extern void *(*ts_current_malloc)(size_t);
|
||||
TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t);
|
||||
TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t);
|
||||
TS_PUBLIC extern void (*ts_current_free)(void *);
|
||||
|
||||
// Allow clients to override allocation functions
|
||||
#ifndef ts_malloc
|
||||
#define ts_malloc ts_current_malloc
|
||||
#endif
|
||||
#ifndef ts_calloc
|
||||
#define ts_calloc ts_current_calloc
|
||||
#endif
|
||||
#ifndef ts_realloc
|
||||
#define ts_realloc ts_current_realloc
|
||||
#endif
|
||||
#ifndef ts_free
|
||||
#define ts_free ts_current_free
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_ALLOC_H_
|
@ -0,0 +1,290 @@
|
||||
#ifndef TREE_SITTER_ARRAY_H_
|
||||
#define TREE_SITTER_ARRAY_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./alloc.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4101)
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
#endif
|
||||
|
||||
#define Array(T) \
|
||||
struct { \
|
||||
T *contents; \
|
||||
uint32_t size; \
|
||||
uint32_t capacity; \
|
||||
}
|
||||
|
||||
/// Initialize an array.
|
||||
#define array_init(self) \
|
||||
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
|
||||
|
||||
/// Create an empty array.
|
||||
#define array_new() \
|
||||
{ NULL, 0, 0 }
|
||||
|
||||
/// Get a pointer to the element at a given `index` in the array.
|
||||
#define array_get(self, _index) \
|
||||
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
|
||||
|
||||
/// Get a pointer to the first element in the array.
|
||||
#define array_front(self) array_get(self, 0)
|
||||
|
||||
/// Get a pointer to the last element in the array.
|
||||
#define array_back(self) array_get(self, (self)->size - 1)
|
||||
|
||||
/// Clear the array, setting its size to zero. Note that this does not free any
|
||||
/// memory allocated for the array's contents.
|
||||
#define array_clear(self) ((self)->size = 0)
|
||||
|
||||
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
|
||||
/// less than the array's current capacity, this function has no effect.
|
||||
#define array_reserve(self, new_capacity) \
|
||||
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
|
||||
|
||||
/// Free any memory allocated for this array. Note that this does not free any
|
||||
/// memory allocated for the array's contents.
|
||||
#define array_delete(self) _array__delete((Array *)(self))
|
||||
|
||||
/// Push a new `element` onto the end of the array.
|
||||
#define array_push(self, element) \
|
||||
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
|
||||
(self)->contents[(self)->size++] = (element))
|
||||
|
||||
/// Increase the array's size by `count` elements.
|
||||
/// New elements are zero-initialized.
|
||||
#define array_grow_by(self, count) \
|
||||
do { \
|
||||
if ((count) == 0) break; \
|
||||
_array__grow((Array *)(self), count, array_elem_size(self)); \
|
||||
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
|
||||
(self)->size += (count); \
|
||||
} while (0)
|
||||
|
||||
/// Append all elements from one array to the end of another.
|
||||
#define array_push_all(self, other) \
|
||||
array_extend((self), (other)->size, (other)->contents)
|
||||
|
||||
/// Append `count` elements to the end of the array, reading their values from the
|
||||
/// `contents` pointer.
|
||||
#define array_extend(self, count, contents) \
|
||||
_array__splice( \
|
||||
(Array *)(self), array_elem_size(self), (self)->size, \
|
||||
0, count, contents \
|
||||
)
|
||||
|
||||
/// Remove `old_count` elements from the array starting at the given `index`. At
|
||||
/// the same index, insert `new_count` new elements, reading their values from the
|
||||
/// `new_contents` pointer.
|
||||
#define array_splice(self, _index, old_count, new_count, new_contents) \
|
||||
_array__splice( \
|
||||
(Array *)(self), array_elem_size(self), _index, \
|
||||
old_count, new_count, new_contents \
|
||||
)
|
||||
|
||||
/// Insert one `element` into the array at the given `index`.
|
||||
#define array_insert(self, _index, element) \
|
||||
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
|
||||
|
||||
/// Remove one element from the array at the given `index`.
|
||||
#define array_erase(self, _index) \
|
||||
_array__erase((Array *)(self), array_elem_size(self), _index)
|
||||
|
||||
/// Pop the last element off the array, returning the element by value.
|
||||
#define array_pop(self) ((self)->contents[--(self)->size])
|
||||
|
||||
/// Assign the contents of one array to another, reallocating if necessary.
|
||||
#define array_assign(self, other) \
|
||||
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
|
||||
|
||||
/// Swap one array with another
|
||||
#define array_swap(self, other) \
|
||||
_array__swap((Array *)(self), (Array *)(other))
|
||||
|
||||
/// Get the size of the array contents
|
||||
#define array_elem_size(self) (sizeof *(self)->contents)
|
||||
|
||||
/// Search a sorted array for a given `needle` value, using the given `compare`
|
||||
/// callback to determine the order.
|
||||
///
|
||||
/// If an existing element is found to be equal to `needle`, then the `index`
|
||||
/// out-parameter is set to the existing value's index, and the `exists`
|
||||
/// out-parameter is set to true. Otherwise, `index` is set to an index where
|
||||
/// `needle` should be inserted in order to preserve the sorting, and `exists`
|
||||
/// is set to false.
|
||||
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
|
||||
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
|
||||
|
||||
/// Search a sorted array for a given `needle` value, using integer comparisons
|
||||
/// of a given struct field (specified with a leading dot) to determine the order.
|
||||
///
|
||||
/// See also `array_search_sorted_with`.
|
||||
#define array_search_sorted_by(self, field, needle, _index, _exists) \
|
||||
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
|
||||
|
||||
/// Insert a given `value` into a sorted array, using the given `compare`
|
||||
/// callback to determine the order.
|
||||
#define array_insert_sorted_with(self, compare, value) \
|
||||
do { \
|
||||
unsigned _index, _exists; \
|
||||
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
|
||||
if (!_exists) array_insert(self, _index, value); \
|
||||
} while (0)
|
||||
|
||||
/// Insert a given `value` into a sorted array, using integer comparisons of
|
||||
/// a given struct field (specified with a leading dot) to determine the order.
|
||||
///
|
||||
/// See also `array_search_sorted_by`.
|
||||
#define array_insert_sorted_by(self, field, value) \
|
||||
do { \
|
||||
unsigned _index, _exists; \
|
||||
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
|
||||
if (!_exists) array_insert(self, _index, value); \
|
||||
} while (0)
|
||||
|
||||
// Private
|
||||
|
||||
typedef Array(void) Array;
|
||||
|
||||
/// This is not what you're looking for, see `array_delete`.
|
||||
static inline void _array__delete(Array *self) {
|
||||
if (self->contents) {
|
||||
ts_free(self->contents);
|
||||
self->contents = NULL;
|
||||
self->size = 0;
|
||||
self->capacity = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_erase`.
|
||||
static inline void _array__erase(Array *self, size_t element_size,
|
||||
uint32_t index) {
|
||||
assert(index < self->size);
|
||||
char *contents = (char *)self->contents;
|
||||
memmove(contents + index * element_size, contents + (index + 1) * element_size,
|
||||
(self->size - index - 1) * element_size);
|
||||
self->size--;
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_reserve`.
|
||||
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
|
||||
if (new_capacity > self->capacity) {
|
||||
if (self->contents) {
|
||||
self->contents = ts_realloc(self->contents, new_capacity * element_size);
|
||||
} else {
|
||||
self->contents = ts_malloc(new_capacity * element_size);
|
||||
}
|
||||
self->capacity = new_capacity;
|
||||
}
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_assign`.
|
||||
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
|
||||
_array__reserve(self, element_size, other->size);
|
||||
self->size = other->size;
|
||||
memcpy(self->contents, other->contents, self->size * element_size);
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_swap`.
|
||||
static inline void _array__swap(Array *self, Array *other) {
|
||||
Array swap = *other;
|
||||
*other = *self;
|
||||
*self = swap;
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
|
||||
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
|
||||
uint32_t new_size = self->size + count;
|
||||
if (new_size > self->capacity) {
|
||||
uint32_t new_capacity = self->capacity * 2;
|
||||
if (new_capacity < 8) new_capacity = 8;
|
||||
if (new_capacity < new_size) new_capacity = new_size;
|
||||
_array__reserve(self, element_size, new_capacity);
|
||||
}
|
||||
}
|
||||
|
||||
/// This is not what you're looking for, see `array_splice`.
|
||||
static inline void _array__splice(Array *self, size_t element_size,
|
||||
uint32_t index, uint32_t old_count,
|
||||
uint32_t new_count, const void *elements) {
|
||||
uint32_t new_size = self->size + new_count - old_count;
|
||||
uint32_t old_end = index + old_count;
|
||||
uint32_t new_end = index + new_count;
|
||||
assert(old_end <= self->size);
|
||||
|
||||
_array__reserve(self, element_size, new_size);
|
||||
|
||||
char *contents = (char *)self->contents;
|
||||
if (self->size > old_end) {
|
||||
memmove(
|
||||
contents + new_end * element_size,
|
||||
contents + old_end * element_size,
|
||||
(self->size - old_end) * element_size
|
||||
);
|
||||
}
|
||||
if (new_count > 0) {
|
||||
if (elements) {
|
||||
memcpy(
|
||||
(contents + index * element_size),
|
||||
elements,
|
||||
new_count * element_size
|
||||
);
|
||||
} else {
|
||||
memset(
|
||||
(contents + index * element_size),
|
||||
0,
|
||||
new_count * element_size
|
||||
);
|
||||
}
|
||||
}
|
||||
self->size += new_count - old_count;
|
||||
}
|
||||
|
||||
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
|
||||
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
|
||||
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
|
||||
do { \
|
||||
*(_index) = start; \
|
||||
*(_exists) = false; \
|
||||
uint32_t size = (self)->size - *(_index); \
|
||||
if (size == 0) break; \
|
||||
int comparison; \
|
||||
while (size > 1) { \
|
||||
uint32_t half_size = size / 2; \
|
||||
uint32_t mid_index = *(_index) + half_size; \
|
||||
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
|
||||
if (comparison <= 0) *(_index) = mid_index; \
|
||||
size -= half_size; \
|
||||
} \
|
||||
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
|
||||
if (comparison == 0) *(_exists) = true; \
|
||||
else if (comparison < 0) *(_index) += 1; \
|
||||
} while (0)
|
||||
|
||||
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
|
||||
/// parameter by reference in order to work with the generic sorting function above.
|
||||
#define _compare_int(a, b) ((int)*(a) - (int)(b))
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(default : 4101)
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_ARRAY_H_
|
@ -0,0 +1,68 @@
|
||||
#ifndef TREE_SITTER_ATOMIC_H_
|
||||
#define TREE_SITTER_ATOMIC_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __TINYC__
|
||||
|
||||
static inline size_t atomic_load(const volatile size_t *p) {
|
||||
return *p;
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_inc(volatile uint32_t *p) {
|
||||
*p += 1;
|
||||
return *p;
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_dec(volatile uint32_t *p) {
|
||||
*p-= 1;
|
||||
return *p;
|
||||
}
|
||||
|
||||
#elif defined(_WIN32)
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
static inline size_t atomic_load(const volatile size_t *p) {
|
||||
return *p;
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_inc(volatile uint32_t *p) {
|
||||
return InterlockedIncrement((long volatile *)p);
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_dec(volatile uint32_t *p) {
|
||||
return InterlockedDecrement((long volatile *)p);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline size_t atomic_load(const volatile size_t *p) {
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
return __atomic_load_n(p, __ATOMIC_RELAXED);
|
||||
#else
|
||||
return __sync_fetch_and_add((volatile size_t *)p, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_inc(volatile uint32_t *p) {
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST);
|
||||
#else
|
||||
return __sync_add_and_fetch(p, 1U);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t atomic_dec(volatile uint32_t *p) {
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST);
|
||||
#else
|
||||
return __sync_sub_and_fetch(p, 1U);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_ATOMIC_H_
|
@ -0,0 +1,146 @@
|
||||
#ifndef TREE_SITTER_CLOCK_H_
|
||||
#define TREE_SITTER_CLOCK_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint64_t TSDuration;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
// Windows:
|
||||
// * Represent a time as a performance counter value.
|
||||
// * Represent a duration as a number of performance counter ticks.
|
||||
|
||||
#include <windows.h>
|
||||
typedef uint64_t TSClock;
|
||||
|
||||
static inline TSDuration duration_from_micros(uint64_t micros) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
return micros * (uint64_t)frequency.QuadPart / 1000000;
|
||||
}
|
||||
|
||||
static inline uint64_t duration_to_micros(TSDuration self) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
return self * 1000000 / (uint64_t)frequency.QuadPart;
|
||||
}
|
||||
|
||||
static inline TSClock clock_null(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline TSClock clock_now(void) {
|
||||
LARGE_INTEGER result;
|
||||
QueryPerformanceCounter(&result);
|
||||
return (uint64_t)result.QuadPart;
|
||||
}
|
||||
|
||||
static inline TSClock clock_after(TSClock base, TSDuration duration) {
|
||||
return base + duration;
|
||||
}
|
||||
|
||||
static inline bool clock_is_null(TSClock self) {
|
||||
return !self;
|
||||
}
|
||||
|
||||
static inline bool clock_is_gt(TSClock self, TSClock other) {
|
||||
return self > other;
|
||||
}
|
||||
|
||||
#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
|
||||
|
||||
// POSIX with monotonic clock support (Linux)
|
||||
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
|
||||
// * Represent a duration as a number of microseconds.
|
||||
//
|
||||
// On these platforms, parse timeouts will correspond accurately to
|
||||
// real time, regardless of what other processes are running.
|
||||
|
||||
#include <time.h>
|
||||
typedef struct timespec TSClock;
|
||||
|
||||
static inline TSDuration duration_from_micros(uint64_t micros) {
|
||||
return micros;
|
||||
}
|
||||
|
||||
static inline uint64_t duration_to_micros(TSDuration self) {
|
||||
return self;
|
||||
}
|
||||
|
||||
static inline TSClock clock_now(void) {
|
||||
TSClock result;
|
||||
clock_gettime(CLOCK_MONOTONIC, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline TSClock clock_null(void) {
|
||||
return (TSClock) {0, 0};
|
||||
}
|
||||
|
||||
static inline TSClock clock_after(TSClock base, TSDuration duration) {
|
||||
TSClock result = base;
|
||||
result.tv_sec += duration / 1000000;
|
||||
result.tv_nsec += (duration % 1000000) * 1000;
|
||||
if (result.tv_nsec >= 1000000000) {
|
||||
result.tv_nsec -= 1000000000;
|
||||
++(result.tv_sec);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline bool clock_is_null(TSClock self) {
|
||||
return !self.tv_sec;
|
||||
}
|
||||
|
||||
static inline bool clock_is_gt(TSClock self, TSClock other) {
|
||||
if (self.tv_sec > other.tv_sec) return true;
|
||||
if (self.tv_sec < other.tv_sec) return false;
|
||||
return self.tv_nsec > other.tv_nsec;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// macOS or POSIX without monotonic clock support
|
||||
// * Represent a time as a process clock value.
|
||||
// * Represent a duration as a number of process clock ticks.
|
||||
//
|
||||
// On these platforms, parse timeouts may be affected by other processes,
|
||||
// which is not ideal, but is better than using a non-monotonic time API
|
||||
// like `gettimeofday`.
|
||||
|
||||
#include <time.h>
|
||||
typedef uint64_t TSClock;
|
||||
|
||||
static inline TSDuration duration_from_micros(uint64_t micros) {
|
||||
return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
|
||||
}
|
||||
|
||||
static inline uint64_t duration_to_micros(TSDuration self) {
|
||||
return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
|
||||
}
|
||||
|
||||
static inline TSClock clock_null(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline TSClock clock_now(void) {
|
||||
return (uint64_t)clock();
|
||||
}
|
||||
|
||||
static inline TSClock clock_after(TSClock base, TSDuration duration) {
|
||||
return base + duration;
|
||||
}
|
||||
|
||||
static inline bool clock_is_null(TSClock self) {
|
||||
return !self;
|
||||
}
|
||||
|
||||
static inline bool clock_is_gt(TSClock self, TSClock other) {
|
||||
return self > other;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_CLOCK_H_
|
@ -0,0 +1,11 @@
|
||||
#ifndef TREE_SITTER_ERROR_COSTS_H_
|
||||
#define TREE_SITTER_ERROR_COSTS_H_
|
||||
|
||||
#define ERROR_STATE 0
|
||||
#define ERROR_COST_PER_RECOVERY 500
|
||||
#define ERROR_COST_PER_MISSING_TREE 110
|
||||
#define ERROR_COST_PER_SKIPPED_TREE 100
|
||||
#define ERROR_COST_PER_SKIPPED_LINE 30
|
||||
#define ERROR_COST_PER_SKIPPED_CHAR 1
|
||||
|
||||
#endif
|
@ -0,0 +1,501 @@
|
||||
#include "./get_changed_ranges.h"
|
||||
#include "./subtree.h"
|
||||
#include "./language.h"
|
||||
#include "./error_costs.h"
|
||||
#include "./tree_cursor.h"
|
||||
#include <assert.h>
|
||||
|
||||
// #define DEBUG_GET_CHANGED_RANGES
|
||||
|
||||
static void ts_range_array_add(
|
||||
TSRangeArray *self,
|
||||
Length start,
|
||||
Length end
|
||||
) {
|
||||
if (self->size > 0) {
|
||||
TSRange *last_range = array_back(self);
|
||||
if (start.bytes <= last_range->end_byte) {
|
||||
last_range->end_byte = end.bytes;
|
||||
last_range->end_point = end.extent;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (start.bytes < end.bytes) {
|
||||
TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
|
||||
array_push(self, range);
|
||||
}
|
||||
}
|
||||
|
||||
bool ts_range_array_intersects(
|
||||
const TSRangeArray *self,
|
||||
unsigned start_index,
|
||||
uint32_t start_byte,
|
||||
uint32_t end_byte
|
||||
) {
|
||||
for (unsigned i = start_index; i < self->size; i++) {
|
||||
TSRange *range = &self->contents[i];
|
||||
if (range->end_byte > start_byte) {
|
||||
if (range->start_byte >= end_byte) break;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ts_range_array_get_changed_ranges(
|
||||
const TSRange *old_ranges, unsigned old_range_count,
|
||||
const TSRange *new_ranges, unsigned new_range_count,
|
||||
TSRangeArray *differences
|
||||
) {
|
||||
unsigned new_index = 0;
|
||||
unsigned old_index = 0;
|
||||
Length current_position = length_zero();
|
||||
bool in_old_range = false;
|
||||
bool in_new_range = false;
|
||||
|
||||
while (old_index < old_range_count || new_index < new_range_count) {
|
||||
const TSRange *old_range = &old_ranges[old_index];
|
||||
const TSRange *new_range = &new_ranges[new_index];
|
||||
|
||||
Length next_old_position;
|
||||
if (in_old_range) {
|
||||
next_old_position = (Length) {old_range->end_byte, old_range->end_point};
|
||||
} else if (old_index < old_range_count) {
|
||||
next_old_position = (Length) {old_range->start_byte, old_range->start_point};
|
||||
} else {
|
||||
next_old_position = LENGTH_MAX;
|
||||
}
|
||||
|
||||
Length next_new_position;
|
||||
if (in_new_range) {
|
||||
next_new_position = (Length) {new_range->end_byte, new_range->end_point};
|
||||
} else if (new_index < new_range_count) {
|
||||
next_new_position = (Length) {new_range->start_byte, new_range->start_point};
|
||||
} else {
|
||||
next_new_position = LENGTH_MAX;
|
||||
}
|
||||
|
||||
if (next_old_position.bytes < next_new_position.bytes) {
|
||||
if (in_old_range != in_new_range) {
|
||||
ts_range_array_add(differences, current_position, next_old_position);
|
||||
}
|
||||
if (in_old_range) old_index++;
|
||||
current_position = next_old_position;
|
||||
in_old_range = !in_old_range;
|
||||
} else if (next_new_position.bytes < next_old_position.bytes) {
|
||||
if (in_old_range != in_new_range) {
|
||||
ts_range_array_add(differences, current_position, next_new_position);
|
||||
}
|
||||
if (in_new_range) new_index++;
|
||||
current_position = next_new_position;
|
||||
in_new_range = !in_new_range;
|
||||
} else {
|
||||
if (in_old_range != in_new_range) {
|
||||
ts_range_array_add(differences, current_position, next_new_position);
|
||||
}
|
||||
if (in_old_range) old_index++;
|
||||
if (in_new_range) new_index++;
|
||||
in_old_range = !in_old_range;
|
||||
in_new_range = !in_new_range;
|
||||
current_position = next_new_position;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
TreeCursor cursor;
|
||||
const TSLanguage *language;
|
||||
unsigned visible_depth;
|
||||
bool in_padding;
|
||||
} Iterator;
|
||||
|
||||
static Iterator iterator_new(
|
||||
TreeCursor *cursor,
|
||||
const Subtree *tree,
|
||||
const TSLanguage *language
|
||||
) {
|
||||
array_clear(&cursor->stack);
|
||||
array_push(&cursor->stack, ((TreeCursorEntry) {
|
||||
.subtree = tree,
|
||||
.position = length_zero(),
|
||||
.child_index = 0,
|
||||
.structural_child_index = 0,
|
||||
}));
|
||||
return (Iterator) {
|
||||
.cursor = *cursor,
|
||||
.language = language,
|
||||
.visible_depth = 1,
|
||||
.in_padding = false,
|
||||
};
|
||||
}
|
||||
|
||||
static bool iterator_done(Iterator *self) {
|
||||
return self->cursor.stack.size == 0;
|
||||
}
|
||||
|
||||
static Length iterator_start_position(Iterator *self) {
|
||||
TreeCursorEntry entry = *array_back(&self->cursor.stack);
|
||||
if (self->in_padding) {
|
||||
return entry.position;
|
||||
} else {
|
||||
return length_add(entry.position, ts_subtree_padding(*entry.subtree));
|
||||
}
|
||||
}
|
||||
|
||||
static Length iterator_end_position(Iterator *self) {
|
||||
TreeCursorEntry entry = *array_back(&self->cursor.stack);
|
||||
Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
|
||||
if (self->in_padding) {
|
||||
return result;
|
||||
} else {
|
||||
return length_add(result, ts_subtree_size(*entry.subtree));
|
||||
}
|
||||
}
|
||||
|
||||
static bool iterator_tree_is_visible(const Iterator *self) {
|
||||
TreeCursorEntry entry = *array_back(&self->cursor.stack);
|
||||
if (ts_subtree_visible(*entry.subtree)) return true;
|
||||
if (self->cursor.stack.size > 1) {
|
||||
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
|
||||
return ts_language_alias_at(
|
||||
self->language,
|
||||
parent.ptr->production_id,
|
||||
entry.structural_child_index
|
||||
) != 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void iterator_get_visible_state(
|
||||
const Iterator *self,
|
||||
Subtree *tree,
|
||||
TSSymbol *alias_symbol,
|
||||
uint32_t *start_byte
|
||||
) {
|
||||
uint32_t i = self->cursor.stack.size - 1;
|
||||
|
||||
if (self->in_padding) {
|
||||
if (i == 0) return;
|
||||
i--;
|
||||
}
|
||||
|
||||
for (; i + 1 > 0; i--) {
|
||||
TreeCursorEntry entry = self->cursor.stack.contents[i];
|
||||
|
||||
if (i > 0) {
|
||||
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
|
||||
*alias_symbol = ts_language_alias_at(
|
||||
self->language,
|
||||
parent->ptr->production_id,
|
||||
entry.structural_child_index
|
||||
);
|
||||
}
|
||||
|
||||
if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
|
||||
*tree = *entry.subtree;
|
||||
*start_byte = entry.position.bytes;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void iterator_ascend(Iterator *self) {
|
||||
if (iterator_done(self)) return;
|
||||
if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
|
||||
if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
|
||||
self->cursor.stack.size--;
|
||||
}
|
||||
|
||||
static bool iterator_descend(Iterator *self, uint32_t goal_position) {
|
||||
if (self->in_padding) return false;
|
||||
|
||||
bool did_descend = false;
|
||||
do {
|
||||
did_descend = false;
|
||||
TreeCursorEntry entry = *array_back(&self->cursor.stack);
|
||||
Length position = entry.position;
|
||||
uint32_t structural_child_index = 0;
|
||||
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
|
||||
const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
|
||||
Length child_left = length_add(position, ts_subtree_padding(*child));
|
||||
Length child_right = length_add(child_left, ts_subtree_size(*child));
|
||||
|
||||
if (child_right.bytes > goal_position) {
|
||||
array_push(&self->cursor.stack, ((TreeCursorEntry) {
|
||||
.subtree = child,
|
||||
.position = position,
|
||||
.child_index = i,
|
||||
.structural_child_index = structural_child_index,
|
||||
}));
|
||||
|
||||
if (iterator_tree_is_visible(self)) {
|
||||
if (child_left.bytes > goal_position) {
|
||||
self->in_padding = true;
|
||||
} else {
|
||||
self->visible_depth++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
did_descend = true;
|
||||
break;
|
||||
}
|
||||
|
||||
position = child_right;
|
||||
if (!ts_subtree_extra(*child)) structural_child_index++;
|
||||
}
|
||||
} while (did_descend);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void iterator_advance(Iterator *self) {
|
||||
if (self->in_padding) {
|
||||
self->in_padding = false;
|
||||
if (iterator_tree_is_visible(self)) {
|
||||
self->visible_depth++;
|
||||
} else {
|
||||
iterator_descend(self, 0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if (iterator_tree_is_visible(self)) self->visible_depth--;
|
||||
TreeCursorEntry entry = array_pop(&self->cursor.stack);
|
||||
if (iterator_done(self)) return;
|
||||
|
||||
const Subtree *parent = array_back(&self->cursor.stack)->subtree;
|
||||
uint32_t child_index = entry.child_index + 1;
|
||||
if (ts_subtree_child_count(*parent) > child_index) {
|
||||
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
|
||||
uint32_t structural_child_index = entry.structural_child_index;
|
||||
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
|
||||
const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
|
||||
|
||||
array_push(&self->cursor.stack, ((TreeCursorEntry) {
|
||||
.subtree = next_child,
|
||||
.position = position,
|
||||
.child_index = child_index,
|
||||
.structural_child_index = structural_child_index,
|
||||
}));
|
||||
|
||||
if (iterator_tree_is_visible(self)) {
|
||||
if (ts_subtree_padding(*next_child).bytes > 0) {
|
||||
self->in_padding = true;
|
||||
} else {
|
||||
self->visible_depth++;
|
||||
}
|
||||
} else {
|
||||
iterator_descend(self, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
IteratorDiffers,
|
||||
IteratorMayDiffer,
|
||||
IteratorMatches,
|
||||
} IteratorComparison;
|
||||
|
||||
static IteratorComparison iterator_compare(
|
||||
const Iterator *old_iter,
|
||||
const Iterator *new_iter
|
||||
) {
|
||||
Subtree old_tree = NULL_SUBTREE;
|
||||
Subtree new_tree = NULL_SUBTREE;
|
||||
uint32_t old_start = 0;
|
||||
uint32_t new_start = 0;
|
||||
TSSymbol old_alias_symbol = 0;
|
||||
TSSymbol new_alias_symbol = 0;
|
||||
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
|
||||
iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
|
||||
|
||||
if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
|
||||
if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
|
||||
|
||||
if (
|
||||
old_alias_symbol == new_alias_symbol &&
|
||||
ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
|
||||
) {
|
||||
if (old_start == new_start &&
|
||||
!ts_subtree_has_changes(old_tree) &&
|
||||
ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
|
||||
ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
|
||||
ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
|
||||
ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
|
||||
(ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
|
||||
(ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
|
||||
return IteratorMatches;
|
||||
} else {
|
||||
return IteratorMayDiffer;
|
||||
}
|
||||
}
|
||||
|
||||
return IteratorDiffers;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
static inline void iterator_print_state(Iterator *self) {
|
||||
TreeCursorEntry entry = *array_back(&self->cursor.stack);
|
||||
TSPoint start = iterator_start_position(self).extent;
|
||||
TSPoint end = iterator_end_position(self).extent;
|
||||
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
|
||||
printf(
|
||||
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
|
||||
name, self->in_padding ? "(p)" : " ",
|
||||
self->visible_depth,
|
||||
start.row + 1, start.column,
|
||||
end.row + 1, end.column
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned ts_subtree_get_changed_ranges(
|
||||
const Subtree *old_tree, const Subtree *new_tree,
|
||||
TreeCursor *cursor1, TreeCursor *cursor2,
|
||||
const TSLanguage *language,
|
||||
const TSRangeArray *included_range_differences,
|
||||
TSRange **ranges
|
||||
) {
|
||||
TSRangeArray results = array_new();
|
||||
|
||||
Iterator old_iter = iterator_new(cursor1, old_tree, language);
|
||||
Iterator new_iter = iterator_new(cursor2, new_tree, language);
|
||||
|
||||
unsigned included_range_difference_index = 0;
|
||||
|
||||
Length position = iterator_start_position(&old_iter);
|
||||
Length next_position = iterator_start_position(&new_iter);
|
||||
if (position.bytes < next_position.bytes) {
|
||||
ts_range_array_add(&results, position, next_position);
|
||||
position = next_position;
|
||||
} else if (position.bytes > next_position.bytes) {
|
||||
ts_range_array_add(&results, next_position, position);
|
||||
next_position = position;
|
||||
}
|
||||
|
||||
do {
|
||||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
|
||||
iterator_print_state(&old_iter);
|
||||
printf("\tvs\t");
|
||||
iterator_print_state(&new_iter);
|
||||
puts("");
|
||||
#endif
|
||||
|
||||
// Compare the old and new subtrees.
|
||||
IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
|
||||
|
||||
// Even if the two subtrees appear to be identical, they could differ
|
||||
// internally if they contain a range of text that was previously
|
||||
// excluded from the parse, and is now included, or vice-versa.
|
||||
if (comparison == IteratorMatches && ts_range_array_intersects(
|
||||
included_range_differences,
|
||||
included_range_difference_index,
|
||||
position.bytes,
|
||||
iterator_end_position(&old_iter).bytes
|
||||
)) {
|
||||
comparison = IteratorMayDiffer;
|
||||
}
|
||||
|
||||
bool is_changed = false;
|
||||
switch (comparison) {
|
||||
// If the subtrees are definitely identical, move to the end
|
||||
// of both subtrees.
|
||||
case IteratorMatches:
|
||||
next_position = iterator_end_position(&old_iter);
|
||||
break;
|
||||
|
||||
// If the subtrees might differ internally, descend into both
|
||||
// subtrees, finding the first child that spans the current position.
|
||||
case IteratorMayDiffer:
|
||||
if (iterator_descend(&old_iter, position.bytes)) {
|
||||
if (!iterator_descend(&new_iter, position.bytes)) {
|
||||
is_changed = true;
|
||||
next_position = iterator_end_position(&old_iter);
|
||||
}
|
||||
} else if (iterator_descend(&new_iter, position.bytes)) {
|
||||
is_changed = true;
|
||||
next_position = iterator_end_position(&new_iter);
|
||||
} else {
|
||||
next_position = length_min(
|
||||
iterator_end_position(&old_iter),
|
||||
iterator_end_position(&new_iter)
|
||||
);
|
||||
}
|
||||
break;
|
||||
|
||||
// If the subtrees are different, record a change and then move
|
||||
// to the end of both subtrees.
|
||||
case IteratorDiffers:
|
||||
is_changed = true;
|
||||
next_position = length_min(
|
||||
iterator_end_position(&old_iter),
|
||||
iterator_end_position(&new_iter)
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
// Ensure that both iterators are caught up to the current position.
|
||||
while (
|
||||
!iterator_done(&old_iter) &&
|
||||
iterator_end_position(&old_iter).bytes <= next_position.bytes
|
||||
) iterator_advance(&old_iter);
|
||||
while (
|
||||
!iterator_done(&new_iter) &&
|
||||
iterator_end_position(&new_iter).bytes <= next_position.bytes
|
||||
) iterator_advance(&new_iter);
|
||||
|
||||
// Ensure that both iterators are at the same depth in the tree.
|
||||
while (old_iter.visible_depth > new_iter.visible_depth) {
|
||||
iterator_ascend(&old_iter);
|
||||
}
|
||||
while (new_iter.visible_depth > old_iter.visible_depth) {
|
||||
iterator_ascend(&new_iter);
|
||||
}
|
||||
|
||||
if (is_changed) {
|
||||
#ifdef DEBUG_GET_CHANGED_RANGES
|
||||
printf(
|
||||
" change: [[%u, %u] - [%u, %u]]\n",
|
||||
position.extent.row + 1, position.extent.column,
|
||||
next_position.extent.row + 1, next_position.extent.column
|
||||
);
|
||||
#endif
|
||||
|
||||
ts_range_array_add(&results, position, next_position);
|
||||
}
|
||||
|
||||
position = next_position;
|
||||
|
||||
// Keep track of the current position in the included range differences
|
||||
// array in order to avoid scanning the entire array on each iteration.
|
||||
while (included_range_difference_index < included_range_differences->size) {
|
||||
const TSRange *range = &included_range_differences->contents[
|
||||
included_range_difference_index
|
||||
];
|
||||
if (range->end_byte <= position.bytes) {
|
||||
included_range_difference_index++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
|
||||
|
||||
Length old_size = ts_subtree_total_size(*old_tree);
|
||||
Length new_size = ts_subtree_total_size(*new_tree);
|
||||
if (old_size.bytes < new_size.bytes) {
|
||||
ts_range_array_add(&results, old_size, new_size);
|
||||
} else if (new_size.bytes < old_size.bytes) {
|
||||
ts_range_array_add(&results, new_size, old_size);
|
||||
}
|
||||
|
||||
*cursor1 = old_iter.cursor;
|
||||
*cursor2 = new_iter.cursor;
|
||||
*ranges = results.contents;
|
||||
return results.size;
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
|
||||
#define TREE_SITTER_GET_CHANGED_RANGES_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./tree_cursor.h"
|
||||
#include "./subtree.h"
|
||||
|
||||
typedef Array(TSRange) TSRangeArray;
|
||||
|
||||
void ts_range_array_get_changed_ranges(
|
||||
const TSRange *old_ranges, unsigned old_range_count,
|
||||
const TSRange *new_ranges, unsigned new_range_count,
|
||||
TSRangeArray *differences
|
||||
);
|
||||
|
||||
bool ts_range_array_intersects(
|
||||
const TSRangeArray *self, unsigned start_index,
|
||||
uint32_t start_byte, uint32_t end_byte
|
||||
);
|
||||
|
||||
unsigned ts_subtree_get_changed_ranges(
|
||||
const Subtree *old_tree, const Subtree *new_tree,
|
||||
TreeCursor *cursor1, TreeCursor *cursor2,
|
||||
const TSLanguage *language,
|
||||
const TSRangeArray *included_range_differences,
|
||||
TSRange **ranges
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_GET_CHANGED_RANGES_H_
|
@ -0,0 +1,21 @@
|
||||
|
||||
// Determine endian and pointer size based on known defines.
|
||||
// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments
|
||||
// to override this.
|
||||
|
||||
#if !defined(TS_BIG_ENDIAN)
|
||||
#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
|
||||
|| (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__)))
|
||||
#define TS_BIG_ENDIAN 1
|
||||
#else
|
||||
#define TS_BIG_ENDIAN 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(TS_PTR_SIZE)
|
||||
#if UINTPTR_MAX == 0xFFFFFFFF
|
||||
#define TS_PTR_SIZE 32
|
||||
#else
|
||||
#define TS_PTR_SIZE 64
|
||||
#endif
|
||||
#endif
|
@ -0,0 +1,221 @@
|
||||
#include "./language.h"
|
||||
#include "./wasm_store.h"
|
||||
#include "tree_sitter/api.h"
|
||||
#include <string.h>
|
||||
|
||||
const TSLanguage *ts_language_copy(const TSLanguage *self) {
|
||||
if (self && ts_language_is_wasm(self)) {
|
||||
ts_wasm_language_retain(self);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
void ts_language_delete(const TSLanguage *self) {
|
||||
if (self && ts_language_is_wasm(self)) {
|
||||
ts_wasm_language_release(self);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ts_language_symbol_count(const TSLanguage *self) {
|
||||
return self->symbol_count + self->alias_count;
|
||||
}
|
||||
|
||||
uint32_t ts_language_state_count(const TSLanguage *self) {
|
||||
return self->state_count;
|
||||
}
|
||||
|
||||
uint32_t ts_language_version(const TSLanguage *self) {
|
||||
return self->version;
|
||||
}
|
||||
|
||||
uint32_t ts_language_field_count(const TSLanguage *self) {
|
||||
return self->field_count;
|
||||
}
|
||||
|
||||
void ts_language_table_entry(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol,
|
||||
TableEntry *result
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
|
||||
result->action_count = 0;
|
||||
result->is_reusable = false;
|
||||
result->actions = NULL;
|
||||
} else {
|
||||
assert(symbol < self->token_count);
|
||||
uint32_t action_index = ts_language_lookup(self, state, symbol);
|
||||
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
||||
result->action_count = entry->entry.count;
|
||||
result->is_reusable = entry->entry.reusable;
|
||||
result->actions = (const TSParseAction *)(entry + 1);
|
||||
}
|
||||
}
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error) {
|
||||
return (TSSymbolMetadata) {.visible = true, .named = true};
|
||||
} else if (symbol == ts_builtin_sym_error_repeat) {
|
||||
return (TSSymbolMetadata) {.visible = false, .named = false};
|
||||
} else {
|
||||
return self->symbol_metadata[symbol];
|
||||
}
|
||||
}
|
||||
|
||||
TSSymbol ts_language_public_symbol(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error) return symbol;
|
||||
return self->public_symbol_map[symbol];
|
||||
}
|
||||
|
||||
TSStateId ts_language_next_state(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
|
||||
return 0;
|
||||
} else if (symbol < self->token_count) {
|
||||
uint32_t count;
|
||||
const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
|
||||
if (count > 0) {
|
||||
TSParseAction action = actions[count - 1];
|
||||
if (action.type == TSParseActionTypeShift) {
|
||||
return action.shift.extra ? state : action.shift.state;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
return ts_language_lookup(self, state, symbol);
|
||||
}
|
||||
}
|
||||
|
||||
const char *ts_language_symbol_name(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (symbol == ts_builtin_sym_error) {
|
||||
return "ERROR";
|
||||
} else if (symbol == ts_builtin_sym_error_repeat) {
|
||||
return "_ERROR";
|
||||
} else if (symbol < ts_language_symbol_count(self)) {
|
||||
return self->symbol_names[symbol];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
TSSymbol ts_language_symbol_for_name(
|
||||
const TSLanguage *self,
|
||||
const char *string,
|
||||
uint32_t length,
|
||||
bool is_named
|
||||
) {
|
||||
if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
|
||||
uint16_t count = (uint16_t)ts_language_symbol_count(self);
|
||||
for (TSSymbol i = 0; i < count; i++) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
|
||||
if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
|
||||
const char *symbol_name = self->symbol_names[i];
|
||||
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
|
||||
return self->public_symbol_map[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
TSSymbolType ts_language_symbol_type(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
|
||||
if (metadata.named && metadata.visible) {
|
||||
return TSSymbolTypeRegular;
|
||||
} else if (metadata.visible) {
|
||||
return TSSymbolTypeAnonymous;
|
||||
} else {
|
||||
return TSSymbolTypeAuxiliary;
|
||||
}
|
||||
}
|
||||
|
||||
const char *ts_language_field_name_for_id(
|
||||
const TSLanguage *self,
|
||||
TSFieldId id
|
||||
) {
|
||||
uint32_t count = ts_language_field_count(self);
|
||||
if (count && id <= count) {
|
||||
return self->field_names[id];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
TSFieldId ts_language_field_id_for_name(
|
||||
const TSLanguage *self,
|
||||
const char *name,
|
||||
uint32_t name_length
|
||||
) {
|
||||
uint16_t count = (uint16_t)ts_language_field_count(self);
|
||||
for (TSSymbol i = 1; i < count + 1; i++) {
|
||||
switch (strncmp(name, self->field_names[i], name_length)) {
|
||||
case 0:
|
||||
if (self->field_names[i][name_length] == 0) return i;
|
||||
break;
|
||||
case -1:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
|
||||
if (state >= self->state_count) return NULL;
|
||||
LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
|
||||
*iterator = ts_language_lookaheads(self, state);
|
||||
return (TSLookaheadIterator *)iterator;
|
||||
}
|
||||
|
||||
void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
|
||||
ts_free(self);
|
||||
}
|
||||
|
||||
bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
|
||||
LookaheadIterator *iterator = (LookaheadIterator *)self;
|
||||
if (state >= iterator->language->state_count) return false;
|
||||
*iterator = ts_language_lookaheads(iterator->language, state);
|
||||
return true;
|
||||
}
|
||||
|
||||
const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
|
||||
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
|
||||
return iterator->language;
|
||||
}
|
||||
|
||||
bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
|
||||
if (state >= language->state_count) return false;
|
||||
LookaheadIterator *iterator = (LookaheadIterator *)self;
|
||||
*iterator = ts_language_lookaheads(language, state);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
|
||||
LookaheadIterator *iterator = (LookaheadIterator *)self;
|
||||
return ts_lookahead_iterator__next(iterator);
|
||||
}
|
||||
|
||||
TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
|
||||
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
|
||||
return iterator->symbol;
|
||||
}
|
||||
|
||||
const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
|
||||
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
|
||||
return ts_language_symbol_name(iterator->language, iterator->symbol);
|
||||
}
|
@ -0,0 +1,299 @@
|
||||
#ifndef TREE_SITTER_LANGUAGE_H_
|
||||
#define TREE_SITTER_LANGUAGE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./subtree.h"
|
||||
#include "./parser.h"
|
||||
|
||||
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
|
||||
|
||||
#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
|
||||
#define LANGUAGE_VERSION_USABLE_VIA_WASM 13
|
||||
|
||||
typedef struct {
|
||||
const TSParseAction *actions;
|
||||
uint32_t action_count;
|
||||
bool is_reusable;
|
||||
} TableEntry;
|
||||
|
||||
typedef struct {
|
||||
const TSLanguage *language;
|
||||
const uint16_t *data;
|
||||
const uint16_t *group_end;
|
||||
TSStateId state;
|
||||
uint16_t table_value;
|
||||
uint16_t section_index;
|
||||
uint16_t group_count;
|
||||
bool is_small_state;
|
||||
|
||||
const TSParseAction *actions;
|
||||
TSSymbol symbol;
|
||||
TSStateId next_state;
|
||||
uint16_t action_count;
|
||||
} LookaheadIterator;
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
|
||||
|
||||
TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol);
|
||||
|
||||
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
|
||||
return 0 < symbol && symbol < self->external_token_count + 1;
|
||||
}
|
||||
|
||||
static inline const TSParseAction *ts_language_actions(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol,
|
||||
uint32_t *count
|
||||
) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
*count = entry.action_count;
|
||||
return entry.actions;
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_reduce_action(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
TableEntry entry;
|
||||
ts_language_table_entry(self, state, symbol, &entry);
|
||||
return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
|
||||
}
|
||||
|
||||
// Lookup the table value for a given symbol and state.
|
||||
//
|
||||
// For non-terminal symbols, the table value represents a successor state.
|
||||
// For terminal symbols, it represents an index in the actions table.
|
||||
// For 'large' parse states, this is a direct lookup. For 'small' parse
|
||||
// states, this requires searching through the symbol groups to find
|
||||
// the given symbol.
|
||||
static inline uint16_t ts_language_lookup(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
if (state >= self->large_state_count) {
|
||||
uint32_t index = self->small_parse_table_map[state - self->large_state_count];
|
||||
const uint16_t *data = &self->small_parse_table[index];
|
||||
uint16_t group_count = *(data++);
|
||||
for (unsigned i = 0; i < group_count; i++) {
|
||||
uint16_t section_value = *(data++);
|
||||
uint16_t symbol_count = *(data++);
|
||||
for (unsigned j = 0; j < symbol_count; j++) {
|
||||
if (*(data++) == symbol) return section_value;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
return self->parse_table[state * self->symbol_count + symbol];
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool ts_language_has_actions(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
return ts_language_lookup(self, state, symbol) != 0;
|
||||
}
|
||||
|
||||
// Iterate over all of the symbols that are valid in the given state.
|
||||
//
|
||||
// For 'large' parse states, this just requires iterating through
|
||||
// all possible symbols and checking the parse table for each one.
|
||||
// For 'small' parse states, this exploits the structure of the
|
||||
// table to only visit the valid symbols.
|
||||
static inline LookaheadIterator ts_language_lookaheads(
|
||||
const TSLanguage *self,
|
||||
TSStateId state
|
||||
) {
|
||||
bool is_small_state = state >= self->large_state_count;
|
||||
const uint16_t *data;
|
||||
const uint16_t *group_end = NULL;
|
||||
uint16_t group_count = 0;
|
||||
if (is_small_state) {
|
||||
uint32_t index = self->small_parse_table_map[state - self->large_state_count];
|
||||
data = &self->small_parse_table[index];
|
||||
group_end = data + 1;
|
||||
group_count = *data;
|
||||
} else {
|
||||
data = &self->parse_table[state * self->symbol_count] - 1;
|
||||
}
|
||||
return (LookaheadIterator) {
|
||||
.language = self,
|
||||
.data = data,
|
||||
.group_end = group_end,
|
||||
.group_count = group_count,
|
||||
.is_small_state = is_small_state,
|
||||
.symbol = UINT16_MAX,
|
||||
.next_state = 0,
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
|
||||
// For small parse states, valid symbols are listed explicitly,
|
||||
// grouped by their value. There's no need to look up the actions
|
||||
// again until moving to the next group.
|
||||
if (self->is_small_state) {
|
||||
self->data++;
|
||||
if (self->data == self->group_end) {
|
||||
if (self->group_count == 0) return false;
|
||||
self->group_count--;
|
||||
self->table_value = *(self->data++);
|
||||
unsigned symbol_count = *(self->data++);
|
||||
self->group_end = self->data + symbol_count;
|
||||
self->symbol = *self->data;
|
||||
} else {
|
||||
self->symbol = *self->data;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// For large parse states, iterate through every symbol until one
|
||||
// is found that has valid actions.
|
||||
else {
|
||||
do {
|
||||
self->data++;
|
||||
self->symbol++;
|
||||
if (self->symbol >= self->language->symbol_count) return false;
|
||||
self->table_value = *self->data;
|
||||
} while (!self->table_value);
|
||||
}
|
||||
|
||||
// Depending on if the symbols is terminal or non-terminal, the table value either
|
||||
// represents a list of actions or a successor state.
|
||||
if (self->symbol < self->language->token_count) {
|
||||
const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
|
||||
self->action_count = entry->entry.count;
|
||||
self->actions = (const TSParseAction *)(entry + 1);
|
||||
self->next_state = 0;
|
||||
} else {
|
||||
self->action_count = 0;
|
||||
self->next_state = self->table_value;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Whether the state is a "primary state". If this returns false, it indicates that there exists
|
||||
// another state that behaves identically to this one with respect to query analysis.
|
||||
static inline bool ts_language_state_is_primary(
|
||||
const TSLanguage *self,
|
||||
TSStateId state
|
||||
) {
|
||||
if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
|
||||
return state == self->primary_state_ids[state];
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static inline const bool *ts_language_enabled_external_tokens(
|
||||
const TSLanguage *self,
|
||||
unsigned external_scanner_state
|
||||
) {
|
||||
if (external_scanner_state == 0) {
|
||||
return NULL;
|
||||
} else {
|
||||
return self->external_scanner.states + self->external_token_count * external_scanner_state;
|
||||
}
|
||||
}
|
||||
|
||||
static inline const TSSymbol *ts_language_alias_sequence(
|
||||
const TSLanguage *self,
|
||||
uint32_t production_id
|
||||
) {
|
||||
return production_id ?
|
||||
&self->alias_sequences[production_id * self->max_alias_sequence_length] :
|
||||
NULL;
|
||||
}
|
||||
|
||||
static inline TSSymbol ts_language_alias_at(
|
||||
const TSLanguage *self,
|
||||
uint32_t production_id,
|
||||
uint32_t child_index
|
||||
) {
|
||||
return production_id ?
|
||||
self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
|
||||
0;
|
||||
}
|
||||
|
||||
static inline void ts_language_field_map(
|
||||
const TSLanguage *self,
|
||||
uint32_t production_id,
|
||||
const TSFieldMapEntry **start,
|
||||
const TSFieldMapEntry **end
|
||||
) {
|
||||
if (self->field_count == 0) {
|
||||
*start = NULL;
|
||||
*end = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
TSFieldMapSlice slice = self->field_map_slices[production_id];
|
||||
*start = &self->field_map_entries[slice.index];
|
||||
*end = &self->field_map_entries[slice.index] + slice.length;
|
||||
}
|
||||
|
||||
static inline void ts_language_aliases_for_symbol(
|
||||
const TSLanguage *self,
|
||||
TSSymbol original_symbol,
|
||||
const TSSymbol **start,
|
||||
const TSSymbol **end
|
||||
) {
|
||||
*start = &self->public_symbol_map[original_symbol];
|
||||
*end = *start + 1;
|
||||
|
||||
unsigned idx = 0;
|
||||
for (;;) {
|
||||
TSSymbol symbol = self->alias_map[idx++];
|
||||
if (symbol == 0 || symbol > original_symbol) break;
|
||||
uint16_t count = self->alias_map[idx++];
|
||||
if (symbol == original_symbol) {
|
||||
*start = &self->alias_map[idx];
|
||||
*end = &self->alias_map[idx + count];
|
||||
break;
|
||||
}
|
||||
idx += count;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ts_language_write_symbol_as_dot_string(
|
||||
const TSLanguage *self,
|
||||
FILE *f,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
const char *name = ts_language_symbol_name(self, symbol);
|
||||
for (const char *chr = name; *chr; chr++) {
|
||||
switch (*chr) {
|
||||
case '"':
|
||||
case '\\':
|
||||
fputc('\\', f);
|
||||
fputc(*chr, f);
|
||||
break;
|
||||
case '\n':
|
||||
fputs("\\n", f);
|
||||
break;
|
||||
case '\t':
|
||||
fputs("\\t", f);
|
||||
break;
|
||||
default:
|
||||
fputc(*chr, f);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_LANGUAGE_H_
|
@ -0,0 +1,52 @@
|
||||
#ifndef TREE_SITTER_LENGTH_H_
|
||||
#define TREE_SITTER_LENGTH_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include "./point.h"
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t bytes;
|
||||
TSPoint extent;
|
||||
} Length;
|
||||
|
||||
static const Length LENGTH_UNDEFINED = {0, {0, 1}};
|
||||
static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
|
||||
|
||||
static inline bool length_is_undefined(Length length) {
|
||||
return length.bytes == 0 && length.extent.column != 0;
|
||||
}
|
||||
|
||||
static inline Length length_min(Length len1, Length len2) {
|
||||
return (len1.bytes < len2.bytes) ? len1 : len2;
|
||||
}
|
||||
|
||||
static inline Length length_add(Length len1, Length len2) {
|
||||
Length result;
|
||||
result.bytes = len1.bytes + len2.bytes;
|
||||
result.extent = point_add(len1.extent, len2.extent);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline Length length_sub(Length len1, Length len2) {
|
||||
Length result;
|
||||
result.bytes = len1.bytes - len2.bytes;
|
||||
result.extent = point_sub(len1.extent, len2.extent);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline Length length_zero(void) {
|
||||
Length result = {0, {0, 0}};
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline Length length_saturating_sub(Length len1, Length len2) {
|
||||
if (len1.bytes > len2.bytes) {
|
||||
return length_sub(len1, len2);
|
||||
} else {
|
||||
return length_zero();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,419 @@
|
||||
#include <stdio.h>
|
||||
#include "./lexer.h"
|
||||
#include "./subtree.h"
|
||||
#include "./length.h"
|
||||
#include "./unicode.h"
|
||||
|
||||
#define LOG(message, character) \
|
||||
if (self->logger.log) { \
|
||||
snprintf( \
|
||||
self->debug_buffer, \
|
||||
TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
|
||||
32 <= character && character < 127 ? \
|
||||
message " character:'%c'" : \
|
||||
message " character:%d", \
|
||||
character \
|
||||
); \
|
||||
self->logger.log( \
|
||||
self->logger.payload, \
|
||||
TSLogTypeLex, \
|
||||
self->debug_buffer \
|
||||
); \
|
||||
}
|
||||
|
||||
static const int32_t BYTE_ORDER_MARK = 0xFEFF;
|
||||
|
||||
static const TSRange DEFAULT_RANGE = {
|
||||
.start_point = {
|
||||
.row = 0,
|
||||
.column = 0,
|
||||
},
|
||||
.end_point = {
|
||||
.row = UINT32_MAX,
|
||||
.column = UINT32_MAX,
|
||||
},
|
||||
.start_byte = 0,
|
||||
.end_byte = UINT32_MAX
|
||||
};
|
||||
|
||||
// Check if the lexer has reached EOF. This state is stored
|
||||
// by setting the lexer's `current_included_range_index` such that
|
||||
// it has consumed all of its available ranges.
|
||||
static bool ts_lexer__eof(const TSLexer *_self) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
return self->current_included_range_index == self->included_range_count;
|
||||
}
|
||||
|
||||
// Clear the currently stored chunk of source code, because the lexer's
|
||||
// position has changed.
|
||||
static void ts_lexer__clear_chunk(Lexer *self) {
|
||||
self->chunk = NULL;
|
||||
self->chunk_size = 0;
|
||||
self->chunk_start = 0;
|
||||
}
|
||||
|
||||
// Call the lexer's input callback to obtain a new chunk of source code
|
||||
// for the current position.
|
||||
static void ts_lexer__get_chunk(Lexer *self) {
|
||||
self->chunk_start = self->current_position.bytes;
|
||||
self->chunk = self->input.read(
|
||||
self->input.payload,
|
||||
self->current_position.bytes,
|
||||
self->current_position.extent,
|
||||
&self->chunk_size
|
||||
);
|
||||
if (!self->chunk_size) {
|
||||
self->current_included_range_index = self->included_range_count;
|
||||
self->chunk = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Decode the next unicode character in the current chunk of source code.
|
||||
// This assumes that the lexer has already retrieved a chunk of source
|
||||
// code that spans the current position.
|
||||
static void ts_lexer__get_lookahead(Lexer *self) {
|
||||
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
|
||||
uint32_t size = self->chunk_size - position_in_chunk;
|
||||
|
||||
if (size == 0) {
|
||||
self->lookahead_size = 1;
|
||||
self->data.lookahead = '\0';
|
||||
return;
|
||||
}
|
||||
|
||||
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
|
||||
UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
|
||||
? ts_decode_utf8
|
||||
: ts_decode_utf16;
|
||||
|
||||
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
|
||||
|
||||
// If this chunk ended in the middle of a multi-byte character,
|
||||
// try again with a fresh chunk.
|
||||
if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
|
||||
ts_lexer__get_chunk(self);
|
||||
chunk = (const uint8_t *)self->chunk;
|
||||
size = self->chunk_size;
|
||||
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
|
||||
}
|
||||
|
||||
if (self->data.lookahead == TS_DECODE_ERROR) {
|
||||
self->lookahead_size = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void ts_lexer_goto(Lexer *self, Length position) {
|
||||
self->current_position = position;
|
||||
|
||||
// Move to the first valid position at or after the given position.
|
||||
bool found_included_range = false;
|
||||
for (unsigned i = 0; i < self->included_range_count; i++) {
|
||||
TSRange *included_range = &self->included_ranges[i];
|
||||
if (
|
||||
included_range->end_byte > self->current_position.bytes &&
|
||||
included_range->end_byte > included_range->start_byte
|
||||
) {
|
||||
if (included_range->start_byte >= self->current_position.bytes) {
|
||||
self->current_position = (Length) {
|
||||
.bytes = included_range->start_byte,
|
||||
.extent = included_range->start_point,
|
||||
};
|
||||
}
|
||||
|
||||
self->current_included_range_index = i;
|
||||
found_included_range = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_included_range) {
|
||||
// If the current position is outside of the current chunk of text,
|
||||
// then clear out the current chunk of text.
|
||||
if (self->chunk && (
|
||||
self->current_position.bytes < self->chunk_start ||
|
||||
self->current_position.bytes >= self->chunk_start + self->chunk_size
|
||||
)) {
|
||||
ts_lexer__clear_chunk(self);
|
||||
}
|
||||
|
||||
self->lookahead_size = 0;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
|
||||
// If the given position is beyond any of included ranges, move to the EOF
|
||||
// state - past the end of the included ranges.
|
||||
else {
|
||||
self->current_included_range_index = self->included_range_count;
|
||||
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
|
||||
self->current_position = (Length) {
|
||||
.bytes = last_included_range->end_byte,
|
||||
.extent = last_included_range->end_point,
|
||||
};
|
||||
ts_lexer__clear_chunk(self);
|
||||
self->lookahead_size = 1;
|
||||
self->data.lookahead = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
// Intended to be called only from functions that control logging.
|
||||
static void ts_lexer__do_advance(Lexer *self, bool skip) {
|
||||
if (self->lookahead_size) {
|
||||
self->current_position.bytes += self->lookahead_size;
|
||||
if (self->data.lookahead == '\n') {
|
||||
self->current_position.extent.row++;
|
||||
self->current_position.extent.column = 0;
|
||||
} else {
|
||||
self->current_position.extent.column += self->lookahead_size;
|
||||
}
|
||||
}
|
||||
|
||||
const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
|
||||
while (
|
||||
self->current_position.bytes >= current_range->end_byte ||
|
||||
current_range->end_byte == current_range->start_byte
|
||||
) {
|
||||
if (self->current_included_range_index < self->included_range_count) {
|
||||
self->current_included_range_index++;
|
||||
}
|
||||
if (self->current_included_range_index < self->included_range_count) {
|
||||
current_range++;
|
||||
self->current_position = (Length) {
|
||||
current_range->start_byte,
|
||||
current_range->start_point,
|
||||
};
|
||||
} else {
|
||||
current_range = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (skip) self->token_start_position = self->current_position;
|
||||
|
||||
if (current_range) {
|
||||
if (
|
||||
self->current_position.bytes < self->chunk_start ||
|
||||
self->current_position.bytes >= self->chunk_start + self->chunk_size
|
||||
) {
|
||||
ts_lexer__get_chunk(self);
|
||||
}
|
||||
ts_lexer__get_lookahead(self);
|
||||
} else {
|
||||
ts_lexer__clear_chunk(self);
|
||||
self->data.lookahead = '\0';
|
||||
self->lookahead_size = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Advance to the next character in the source code, retrieving a new
|
||||
// chunk of source code if needed.
|
||||
static void ts_lexer__advance(TSLexer *_self, bool skip) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
if (!self->chunk) return;
|
||||
|
||||
if (skip) {
|
||||
LOG("skip", self->data.lookahead)
|
||||
} else {
|
||||
LOG("consume", self->data.lookahead)
|
||||
}
|
||||
|
||||
ts_lexer__do_advance(self, skip);
|
||||
}
|
||||
|
||||
// Mark that a token match has completed. This can be called multiple
|
||||
// times if a longer match is found later.
|
||||
static void ts_lexer__mark_end(TSLexer *_self) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
if (!ts_lexer__eof(&self->data)) {
|
||||
// If the lexer is right at the beginning of included range,
|
||||
// then the token should be considered to end at the *end* of the
|
||||
// previous included range, rather than here.
|
||||
TSRange *current_included_range = &self->included_ranges[
|
||||
self->current_included_range_index
|
||||
];
|
||||
if (
|
||||
self->current_included_range_index > 0 &&
|
||||
self->current_position.bytes == current_included_range->start_byte
|
||||
) {
|
||||
TSRange *previous_included_range = current_included_range - 1;
|
||||
self->token_end_position = (Length) {
|
||||
previous_included_range->end_byte,
|
||||
previous_included_range->end_point,
|
||||
};
|
||||
return;
|
||||
}
|
||||
}
|
||||
self->token_end_position = self->current_position;
|
||||
}
|
||||
|
||||
static uint32_t ts_lexer__get_column(TSLexer *_self) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
|
||||
uint32_t goal_byte = self->current_position.bytes;
|
||||
|
||||
self->did_get_column = true;
|
||||
self->current_position.bytes -= self->current_position.extent.column;
|
||||
self->current_position.extent.column = 0;
|
||||
|
||||
if (self->current_position.bytes < self->chunk_start) {
|
||||
ts_lexer__get_chunk(self);
|
||||
}
|
||||
|
||||
uint32_t result = 0;
|
||||
if (!ts_lexer__eof(_self)) {
|
||||
ts_lexer__get_lookahead(self);
|
||||
while (self->current_position.bytes < goal_byte && self->chunk) {
|
||||
result++;
|
||||
ts_lexer__do_advance(self, false);
|
||||
if (ts_lexer__eof(_self)) break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Is the lexer at a boundary between two disjoint included ranges of
|
||||
// source code? This is exposed as an API because some languages' external
|
||||
// scanners need to perform custom actions at these boundaries.
|
||||
static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
|
||||
const Lexer *self = (const Lexer *)_self;
|
||||
if (self->current_included_range_index < self->included_range_count) {
|
||||
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
|
||||
return self->current_position.bytes == current_range->start_byte;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_init(Lexer *self) {
|
||||
*self = (Lexer) {
|
||||
.data = {
|
||||
// The lexer's methods are stored as struct fields so that generated
|
||||
// parsers can call them without needing to be linked against this
|
||||
// library.
|
||||
.advance = ts_lexer__advance,
|
||||
.mark_end = ts_lexer__mark_end,
|
||||
.get_column = ts_lexer__get_column,
|
||||
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
|
||||
.eof = ts_lexer__eof,
|
||||
.lookahead = 0,
|
||||
.result_symbol = 0,
|
||||
},
|
||||
.chunk = NULL,
|
||||
.chunk_size = 0,
|
||||
.chunk_start = 0,
|
||||
.current_position = {0, {0, 0}},
|
||||
.logger = {
|
||||
.payload = NULL,
|
||||
.log = NULL
|
||||
},
|
||||
.included_ranges = NULL,
|
||||
.included_range_count = 0,
|
||||
.current_included_range_index = 0,
|
||||
};
|
||||
ts_lexer_set_included_ranges(self, NULL, 0);
|
||||
}
|
||||
|
||||
void ts_lexer_delete(Lexer *self) {
|
||||
ts_free(self->included_ranges);
|
||||
}
|
||||
|
||||
void ts_lexer_set_input(Lexer *self, TSInput input) {
|
||||
self->input = input;
|
||||
ts_lexer__clear_chunk(self);
|
||||
ts_lexer_goto(self, self->current_position);
|
||||
}
|
||||
|
||||
// Move the lexer to the given position. This doesn't do any work
|
||||
// if the parser is already at the given position.
|
||||
void ts_lexer_reset(Lexer *self, Length position) {
|
||||
if (position.bytes != self->current_position.bytes) {
|
||||
ts_lexer_goto(self, position);
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_start(Lexer *self) {
|
||||
self->token_start_position = self->current_position;
|
||||
self->token_end_position = LENGTH_UNDEFINED;
|
||||
self->data.result_symbol = 0;
|
||||
self->did_get_column = false;
|
||||
if (!ts_lexer__eof(&self->data)) {
|
||||
if (!self->chunk_size) ts_lexer__get_chunk(self);
|
||||
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
|
||||
if (
|
||||
self->current_position.bytes == 0 &&
|
||||
self->data.lookahead == BYTE_ORDER_MARK
|
||||
) ts_lexer__advance(&self->data, true);
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
|
||||
if (length_is_undefined(self->token_end_position)) {
|
||||
ts_lexer__mark_end(&self->data);
|
||||
}
|
||||
|
||||
// If the token ended at an included range boundary, then its end position
|
||||
// will have been reset to the end of the preceding range. Reset the start
|
||||
// position to match.
|
||||
if (self->token_end_position.bytes < self->token_start_position.bytes) {
|
||||
self->token_start_position = self->token_end_position;
|
||||
}
|
||||
|
||||
uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
|
||||
|
||||
// In order to determine that a byte sequence is invalid UTF8 or UTF16,
|
||||
// the character decoding algorithm may have looked at the following byte.
|
||||
// Therefore, the next byte *after* the current (invalid) character
|
||||
// affects the interpretation of the current character.
|
||||
if (self->data.lookahead == TS_DECODE_ERROR) {
|
||||
current_lookahead_end_byte++;
|
||||
}
|
||||
|
||||
if (current_lookahead_end_byte > *lookahead_end_byte) {
|
||||
*lookahead_end_byte = current_lookahead_end_byte;
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_advance_to_end(Lexer *self) {
|
||||
while (self->chunk) {
|
||||
ts_lexer__advance(&self->data, false);
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_mark_end(Lexer *self) {
|
||||
ts_lexer__mark_end(&self->data);
|
||||
}
|
||||
|
||||
bool ts_lexer_set_included_ranges(
|
||||
Lexer *self,
|
||||
const TSRange *ranges,
|
||||
uint32_t count
|
||||
) {
|
||||
if (count == 0 || !ranges) {
|
||||
ranges = &DEFAULT_RANGE;
|
||||
count = 1;
|
||||
} else {
|
||||
uint32_t previous_byte = 0;
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
const TSRange *range = &ranges[i];
|
||||
if (
|
||||
range->start_byte < previous_byte ||
|
||||
range->end_byte < range->start_byte
|
||||
) return false;
|
||||
previous_byte = range->end_byte;
|
||||
}
|
||||
}
|
||||
|
||||
size_t size = count * sizeof(TSRange);
|
||||
self->included_ranges = ts_realloc(self->included_ranges, size);
|
||||
memcpy(self->included_ranges, ranges, size);
|
||||
self->included_range_count = count;
|
||||
ts_lexer_goto(self, self->current_position);
|
||||
return true;
|
||||
}
|
||||
|
||||
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
|
||||
*count = self->included_range_count;
|
||||
return self->included_ranges;
|
||||
}
|
||||
|
||||
#undef LOG
|
@ -0,0 +1,49 @@
|
||||
#ifndef TREE_SITTER_LEXER_H_
|
||||
#define TREE_SITTER_LEXER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./length.h"
|
||||
#include "./subtree.h"
|
||||
#include "tree_sitter/api.h"
|
||||
#include "./parser.h"
|
||||
|
||||
typedef struct {
|
||||
TSLexer data;
|
||||
Length current_position;
|
||||
Length token_start_position;
|
||||
Length token_end_position;
|
||||
|
||||
TSRange *included_ranges;
|
||||
const char *chunk;
|
||||
TSInput input;
|
||||
TSLogger logger;
|
||||
|
||||
uint32_t included_range_count;
|
||||
uint32_t current_included_range_index;
|
||||
uint32_t chunk_start;
|
||||
uint32_t chunk_size;
|
||||
uint32_t lookahead_size;
|
||||
bool did_get_column;
|
||||
|
||||
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
|
||||
} Lexer;
|
||||
|
||||
void ts_lexer_init(Lexer *);
|
||||
void ts_lexer_delete(Lexer *);
|
||||
void ts_lexer_set_input(Lexer *, TSInput);
|
||||
void ts_lexer_reset(Lexer *, Length);
|
||||
void ts_lexer_start(Lexer *);
|
||||
void ts_lexer_finish(Lexer *, uint32_t *);
|
||||
void ts_lexer_advance_to_end(Lexer *);
|
||||
void ts_lexer_mark_end(Lexer *);
|
||||
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
|
||||
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_LEXER_H_
|
@ -0,0 +1,14 @@
|
||||
#define _POSIX_C_SOURCE 200112L
|
||||
|
||||
#include "./alloc.c"
|
||||
#include "./get_changed_ranges.c"
|
||||
#include "./language.c"
|
||||
#include "./lexer.c"
|
||||
#include "./node.c"
|
||||
#include "./parser.c"
|
||||
#include "./query.c"
|
||||
#include "./stack.c"
|
||||
#include "./subtree.c"
|
||||
#include "./tree_cursor.c"
|
||||
#include "./tree.c"
|
||||
#include "./wasm_store.c"
|
@ -0,0 +1,774 @@
|
||||
#include <stdbool.h>
|
||||
#include "./subtree.h"
|
||||
#include "./tree.h"
|
||||
#include "./language.h"
|
||||
|
||||
typedef struct {
|
||||
Subtree parent;
|
||||
const TSTree *tree;
|
||||
Length position;
|
||||
uint32_t child_index;
|
||||
uint32_t structural_child_index;
|
||||
const TSSymbol *alias_sequence;
|
||||
} NodeChildIterator;
|
||||
|
||||
// TSNode - constructors
|
||||
|
||||
TSNode ts_node_new(
|
||||
const TSTree *tree,
|
||||
const Subtree *subtree,
|
||||
Length position,
|
||||
TSSymbol alias
|
||||
) {
|
||||
return (TSNode) {
|
||||
{position.bytes, position.extent.row, position.extent.column, alias},
|
||||
subtree,
|
||||
tree,
|
||||
};
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__null(void) {
|
||||
return ts_node_new(NULL, NULL, length_zero(), 0);
|
||||
}
|
||||
|
||||
// TSNode - accessors
|
||||
|
||||
uint32_t ts_node_start_byte(TSNode self) {
|
||||
return self.context[0];
|
||||
}
|
||||
|
||||
TSPoint ts_node_start_point(TSNode self) {
|
||||
return (TSPoint) {self.context[1], self.context[2]};
|
||||
}
|
||||
|
||||
static inline uint32_t ts_node__alias(const TSNode *self) {
|
||||
return self->context[3];
|
||||
}
|
||||
|
||||
static inline Subtree ts_node__subtree(TSNode self) {
|
||||
return *(const Subtree *)self.id;
|
||||
}
|
||||
|
||||
// NodeChildIterator
|
||||
|
||||
static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
|
||||
Subtree subtree = ts_node__subtree(*node);
|
||||
if (ts_subtree_child_count(subtree) == 0) {
|
||||
return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
|
||||
}
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
node->tree->language,
|
||||
subtree.ptr->production_id
|
||||
);
|
||||
return (NodeChildIterator) {
|
||||
.tree = node->tree,
|
||||
.parent = subtree,
|
||||
.position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
|
||||
.child_index = 0,
|
||||
.structural_child_index = 0,
|
||||
.alias_sequence = alias_sequence,
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
|
||||
return self->child_index == self->parent.ptr->child_count;
|
||||
}
|
||||
|
||||
static inline bool ts_node_child_iterator_next(
|
||||
NodeChildIterator *self,
|
||||
TSNode *result
|
||||
) {
|
||||
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
|
||||
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
|
||||
TSSymbol alias_symbol = 0;
|
||||
if (!ts_subtree_extra(*child)) {
|
||||
if (self->alias_sequence) {
|
||||
alias_symbol = self->alias_sequence[self->structural_child_index];
|
||||
}
|
||||
self->structural_child_index++;
|
||||
}
|
||||
if (self->child_index > 0) {
|
||||
self->position = length_add(self->position, ts_subtree_padding(*child));
|
||||
}
|
||||
*result = ts_node_new(
|
||||
self->tree,
|
||||
child,
|
||||
self->position,
|
||||
alias_symbol
|
||||
);
|
||||
self->position = length_add(self->position, ts_subtree_size(*child));
|
||||
self->child_index++;
|
||||
return true;
|
||||
}
|
||||
|
||||
// TSNode - private
|
||||
|
||||
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
|
||||
Subtree tree = ts_node__subtree(self);
|
||||
if (include_anonymous) {
|
||||
return ts_subtree_visible(tree) || ts_node__alias(&self);
|
||||
} else {
|
||||
TSSymbol alias = ts_node__alias(&self);
|
||||
if (alias) {
|
||||
return ts_language_symbol_metadata(self.tree->language, alias).named;
|
||||
} else {
|
||||
return ts_subtree_visible(tree) && ts_subtree_named(tree);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t ts_node__relevant_child_count(
|
||||
TSNode self,
|
||||
bool include_anonymous
|
||||
) {
|
||||
Subtree tree = ts_node__subtree(self);
|
||||
if (ts_subtree_child_count(tree) > 0) {
|
||||
if (include_anonymous) {
|
||||
return tree.ptr->visible_child_count;
|
||||
} else {
|
||||
return tree.ptr->named_child_count;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__child(
|
||||
TSNode self,
|
||||
uint32_t child_index,
|
||||
bool include_anonymous
|
||||
) {
|
||||
TSNode result = self;
|
||||
bool did_descend = true;
|
||||
|
||||
while (did_descend) {
|
||||
did_descend = false;
|
||||
|
||||
TSNode child;
|
||||
uint32_t index = 0;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&result);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (ts_node__is_relevant(child, include_anonymous)) {
|
||||
if (index == child_index) {
|
||||
return child;
|
||||
}
|
||||
index++;
|
||||
} else {
|
||||
uint32_t grandchild_index = child_index - index;
|
||||
uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
|
||||
if (grandchild_index < grandchild_count) {
|
||||
did_descend = true;
|
||||
result = child;
|
||||
child_index = grandchild_index;
|
||||
break;
|
||||
}
|
||||
index += grandchild_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ts_node__null();
|
||||
}
|
||||
|
||||
static bool ts_subtree_has_trailing_empty_descendant(
|
||||
Subtree self,
|
||||
Subtree other
|
||||
) {
|
||||
for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
|
||||
Subtree child = ts_subtree_children(self)[i];
|
||||
if (ts_subtree_total_bytes(child) > 0) break;
|
||||
if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
|
||||
Subtree self_subtree = ts_node__subtree(self);
|
||||
bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
|
||||
uint32_t target_end_byte = ts_node_end_byte(self);
|
||||
|
||||
TSNode node = ts_node_parent(self);
|
||||
TSNode earlier_node = ts_node__null();
|
||||
bool earlier_node_is_relevant = false;
|
||||
|
||||
while (!ts_node_is_null(node)) {
|
||||
TSNode earlier_child = ts_node__null();
|
||||
bool earlier_child_is_relevant = false;
|
||||
bool found_child_containing_target = false;
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&node);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (child.id == self.id) break;
|
||||
if (iterator.position.bytes > target_end_byte) {
|
||||
found_child_containing_target = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (iterator.position.bytes == target_end_byte &&
|
||||
(!self_is_empty ||
|
||||
ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
|
||||
found_child_containing_target = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ts_node__is_relevant(child, include_anonymous)) {
|
||||
earlier_child = child;
|
||||
earlier_child_is_relevant = true;
|
||||
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
|
||||
earlier_child = child;
|
||||
earlier_child_is_relevant = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_child_containing_target) {
|
||||
if (!ts_node_is_null(earlier_child)) {
|
||||
earlier_node = earlier_child;
|
||||
earlier_node_is_relevant = earlier_child_is_relevant;
|
||||
}
|
||||
node = child;
|
||||
} else if (earlier_child_is_relevant) {
|
||||
return earlier_child;
|
||||
} else if (!ts_node_is_null(earlier_child)) {
|
||||
node = earlier_child;
|
||||
} else if (earlier_node_is_relevant) {
|
||||
return earlier_node;
|
||||
} else {
|
||||
node = earlier_node;
|
||||
earlier_node = ts_node__null();
|
||||
earlier_node_is_relevant = false;
|
||||
}
|
||||
}
|
||||
|
||||
return ts_node__null();
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
|
||||
uint32_t target_end_byte = ts_node_end_byte(self);
|
||||
|
||||
TSNode node = ts_node_parent(self);
|
||||
TSNode later_node = ts_node__null();
|
||||
bool later_node_is_relevant = false;
|
||||
|
||||
while (!ts_node_is_null(node)) {
|
||||
TSNode later_child = ts_node__null();
|
||||
bool later_child_is_relevant = false;
|
||||
TSNode child_containing_target = ts_node__null();
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&node);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (iterator.position.bytes < target_end_byte) continue;
|
||||
if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
|
||||
if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
|
||||
child_containing_target = child;
|
||||
}
|
||||
} else if (ts_node__is_relevant(child, include_anonymous)) {
|
||||
later_child = child;
|
||||
later_child_is_relevant = true;
|
||||
break;
|
||||
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
|
||||
later_child = child;
|
||||
later_child_is_relevant = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ts_node_is_null(child_containing_target)) {
|
||||
if (!ts_node_is_null(later_child)) {
|
||||
later_node = later_child;
|
||||
later_node_is_relevant = later_child_is_relevant;
|
||||
}
|
||||
node = child_containing_target;
|
||||
} else if (later_child_is_relevant) {
|
||||
return later_child;
|
||||
} else if (!ts_node_is_null(later_child)) {
|
||||
node = later_child;
|
||||
} else if (later_node_is_relevant) {
|
||||
return later_node;
|
||||
} else {
|
||||
node = later_node;
|
||||
}
|
||||
}
|
||||
|
||||
return ts_node__null();
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__first_child_for_byte(
|
||||
TSNode self,
|
||||
uint32_t goal,
|
||||
bool include_anonymous
|
||||
) {
|
||||
TSNode node = self;
|
||||
bool did_descend = true;
|
||||
|
||||
while (did_descend) {
|
||||
did_descend = false;
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&node);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (ts_node_end_byte(child) > goal) {
|
||||
if (ts_node__is_relevant(child, include_anonymous)) {
|
||||
return child;
|
||||
} else if (ts_node_child_count(child) > 0) {
|
||||
did_descend = true;
|
||||
node = child;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ts_node__null();
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__descendant_for_byte_range(
|
||||
TSNode self,
|
||||
uint32_t range_start,
|
||||
uint32_t range_end,
|
||||
bool include_anonymous
|
||||
) {
|
||||
TSNode node = self;
|
||||
TSNode last_visible_node = self;
|
||||
|
||||
bool did_descend = true;
|
||||
while (did_descend) {
|
||||
did_descend = false;
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&node);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
uint32_t node_end = iterator.position.bytes;
|
||||
|
||||
// The end of this node must extend far enough forward to touch
|
||||
// the end of the range and exceed the start of the range.
|
||||
if (node_end < range_end) continue;
|
||||
if (node_end <= range_start) continue;
|
||||
|
||||
// The start of this node must extend far enough backward to
|
||||
// touch the start of the range.
|
||||
if (range_start < ts_node_start_byte(child)) break;
|
||||
|
||||
node = child;
|
||||
if (ts_node__is_relevant(node, include_anonymous)) {
|
||||
last_visible_node = node;
|
||||
}
|
||||
did_descend = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return last_visible_node;
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__descendant_for_point_range(
|
||||
TSNode self,
|
||||
TSPoint range_start,
|
||||
TSPoint range_end,
|
||||
bool include_anonymous
|
||||
) {
|
||||
TSNode node = self;
|
||||
TSNode last_visible_node = self;
|
||||
|
||||
bool did_descend = true;
|
||||
while (did_descend) {
|
||||
did_descend = false;
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&node);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
TSPoint node_end = iterator.position.extent;
|
||||
|
||||
// The end of this node must extend far enough forward to touch
|
||||
// the end of the range and exceed the start of the range.
|
||||
if (point_lt(node_end, range_end)) continue;
|
||||
if (point_lte(node_end, range_start)) continue;
|
||||
|
||||
// The start of this node must extend far enough backward to
|
||||
// touch the start of the range.
|
||||
if (point_lt(range_start, ts_node_start_point(child))) break;
|
||||
|
||||
node = child;
|
||||
if (ts_node__is_relevant(node, include_anonymous)) {
|
||||
last_visible_node = node;
|
||||
}
|
||||
did_descend = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return last_visible_node;
|
||||
}
|
||||
|
||||
// TSNode - public
|
||||
|
||||
uint32_t ts_node_end_byte(TSNode self) {
|
||||
return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
|
||||
}
|
||||
|
||||
TSPoint ts_node_end_point(TSNode self) {
|
||||
return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
|
||||
}
|
||||
|
||||
TSSymbol ts_node_symbol(TSNode self) {
|
||||
TSSymbol symbol = ts_node__alias(&self);
|
||||
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
|
||||
return ts_language_public_symbol(self.tree->language, symbol);
|
||||
}
|
||||
|
||||
const char *ts_node_type(TSNode self) {
|
||||
TSSymbol symbol = ts_node__alias(&self);
|
||||
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
|
||||
return ts_language_symbol_name(self.tree->language, symbol);
|
||||
}
|
||||
|
||||
const TSLanguage *ts_node_language(TSNode self) {
|
||||
return self.tree->language;
|
||||
}
|
||||
|
||||
TSSymbol ts_node_grammar_symbol(TSNode self) {
|
||||
return ts_subtree_symbol(ts_node__subtree(self));
|
||||
}
|
||||
|
||||
const char *ts_node_grammar_type(TSNode self) {
|
||||
TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self));
|
||||
return ts_language_symbol_name(self.tree->language, symbol);
|
||||
}
|
||||
|
||||
char *ts_node_string(TSNode self) {
|
||||
TSSymbol alias_symbol = ts_node__alias(&self);
|
||||
return ts_subtree_string(
|
||||
ts_node__subtree(self),
|
||||
alias_symbol,
|
||||
ts_language_symbol_metadata(self.tree->language, alias_symbol).visible,
|
||||
self.tree->language,
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
bool ts_node_eq(TSNode self, TSNode other) {
|
||||
return self.tree == other.tree && self.id == other.id;
|
||||
}
|
||||
|
||||
bool ts_node_is_null(TSNode self) {
|
||||
return self.id == 0;
|
||||
}
|
||||
|
||||
bool ts_node_is_extra(TSNode self) {
|
||||
return ts_subtree_extra(ts_node__subtree(self));
|
||||
}
|
||||
|
||||
bool ts_node_is_named(TSNode self) {
|
||||
TSSymbol alias = ts_node__alias(&self);
|
||||
return alias
|
||||
? ts_language_symbol_metadata(self.tree->language, alias).named
|
||||
: ts_subtree_named(ts_node__subtree(self));
|
||||
}
|
||||
|
||||
bool ts_node_is_missing(TSNode self) {
|
||||
return ts_subtree_missing(ts_node__subtree(self));
|
||||
}
|
||||
|
||||
bool ts_node_has_changes(TSNode self) {
|
||||
return ts_subtree_has_changes(ts_node__subtree(self));
|
||||
}
|
||||
|
||||
bool ts_node_has_error(TSNode self) {
|
||||
return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
|
||||
}
|
||||
|
||||
bool ts_node_is_error(TSNode self) {
|
||||
TSSymbol symbol = ts_node_symbol(self);
|
||||
return symbol == ts_builtin_sym_error;
|
||||
}
|
||||
|
||||
uint32_t ts_node_descendant_count(TSNode self) {
|
||||
return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1;
|
||||
}
|
||||
|
||||
TSStateId ts_node_parse_state(TSNode self) {
|
||||
return ts_subtree_parse_state(ts_node__subtree(self));
|
||||
}
|
||||
|
||||
TSStateId ts_node_next_parse_state(TSNode self) {
|
||||
const TSLanguage *language = self.tree->language;
|
||||
uint16_t state = ts_node_parse_state(self);
|
||||
if (state == TS_TREE_STATE_NONE) {
|
||||
return TS_TREE_STATE_NONE;
|
||||
}
|
||||
uint16_t symbol = ts_node_grammar_symbol(self);
|
||||
return ts_language_next_state(language, state, symbol);
|
||||
}
|
||||
|
||||
TSNode ts_node_parent(TSNode self) {
|
||||
TSNode node = ts_tree_root_node(self.tree);
|
||||
uint32_t end_byte = ts_node_end_byte(self);
|
||||
if (node.id == self.id) return ts_node__null();
|
||||
|
||||
TSNode last_visible_node = node;
|
||||
bool did_descend = true;
|
||||
while (did_descend) {
|
||||
did_descend = false;
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&node);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (
|
||||
ts_node_start_byte(child) > ts_node_start_byte(self) ||
|
||||
child.id == self.id
|
||||
) break;
|
||||
if (iterator.position.bytes >= end_byte && ts_node_child_count(child) > 0) {
|
||||
node = child;
|
||||
if (ts_node__is_relevant(child, true)) {
|
||||
last_visible_node = node;
|
||||
}
|
||||
did_descend = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return last_visible_node;
|
||||
}
|
||||
|
||||
TSNode ts_node_child(TSNode self, uint32_t child_index) {
|
||||
return ts_node__child(self, child_index, true);
|
||||
}
|
||||
|
||||
TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
|
||||
return ts_node__child(self, child_index, false);
|
||||
}
|
||||
|
||||
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
|
||||
recur:
|
||||
if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
|
||||
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self.tree->language,
|
||||
ts_node__subtree(self).ptr->production_id,
|
||||
&field_map,
|
||||
&field_map_end
|
||||
);
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
|
||||
// The field mappings are sorted by their field id. Scan all
|
||||
// the mappings to find the ones for the given field id.
|
||||
while (field_map->field_id < field_id) {
|
||||
field_map++;
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
}
|
||||
while (field_map_end[-1].field_id > field_id) {
|
||||
field_map_end--;
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
}
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&self);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (!ts_subtree_extra(ts_node__subtree(child))) {
|
||||
uint32_t index = iterator.structural_child_index - 1;
|
||||
if (index < field_map->child_index) continue;
|
||||
|
||||
// Hidden nodes' fields are "inherited" by their visible parent.
|
||||
if (field_map->inherited) {
|
||||
|
||||
// If this is the *last* possible child node for this field,
|
||||
// then perform a tail call to avoid recursion.
|
||||
if (field_map + 1 == field_map_end) {
|
||||
self = child;
|
||||
goto recur;
|
||||
}
|
||||
|
||||
// Otherwise, descend into this child, but if it doesn't contain
|
||||
// the field, continue searching subsequent children.
|
||||
else {
|
||||
TSNode result = ts_node_child_by_field_id(child, field_id);
|
||||
if (result.id) return result;
|
||||
field_map++;
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
}
|
||||
}
|
||||
|
||||
else if (ts_node__is_relevant(child, true)) {
|
||||
return child;
|
||||
}
|
||||
|
||||
// If the field refers to a hidden node with visible children,
|
||||
// return the first visible child.
|
||||
else if (ts_node_child_count(child) > 0 ) {
|
||||
return ts_node_child(child, 0);
|
||||
}
|
||||
|
||||
// Otherwise, continue searching subsequent children.
|
||||
else {
|
||||
field_map++;
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ts_node__null();
|
||||
}
|
||||
|
||||
static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) {
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self.tree->language,
|
||||
ts_node__subtree(self).ptr->production_id,
|
||||
&field_map,
|
||||
&field_map_end
|
||||
);
|
||||
for (; field_map != field_map_end; field_map++) {
|
||||
if (!field_map->inherited && field_map->child_index == structural_child_index) {
|
||||
return self.tree->language->field_names[field_map->field_id];
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) {
|
||||
TSNode result = self;
|
||||
bool did_descend = true;
|
||||
const char *inherited_field_name = NULL;
|
||||
|
||||
while (did_descend) {
|
||||
did_descend = false;
|
||||
|
||||
TSNode child;
|
||||
uint32_t index = 0;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&result);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (ts_node__is_relevant(child, true)) {
|
||||
if (index == child_index) {
|
||||
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
|
||||
if (field_name) return field_name;
|
||||
return inherited_field_name;
|
||||
}
|
||||
index++;
|
||||
} else {
|
||||
uint32_t grandchild_index = child_index - index;
|
||||
uint32_t grandchild_count = ts_node__relevant_child_count(child, true);
|
||||
if (grandchild_index < grandchild_count) {
|
||||
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
|
||||
if (field_name) inherited_field_name = field_name;
|
||||
|
||||
did_descend = true;
|
||||
result = child;
|
||||
child_index = grandchild_index;
|
||||
break;
|
||||
}
|
||||
index += grandchild_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
TSNode ts_node_child_by_field_name(
|
||||
TSNode self,
|
||||
const char *name,
|
||||
uint32_t name_length
|
||||
) {
|
||||
TSFieldId field_id = ts_language_field_id_for_name(
|
||||
self.tree->language,
|
||||
name,
|
||||
name_length
|
||||
);
|
||||
return ts_node_child_by_field_id(self, field_id);
|
||||
}
|
||||
|
||||
uint32_t ts_node_child_count(TSNode self) {
|
||||
Subtree tree = ts_node__subtree(self);
|
||||
if (ts_subtree_child_count(tree) > 0) {
|
||||
return tree.ptr->visible_child_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ts_node_named_child_count(TSNode self) {
|
||||
Subtree tree = ts_node__subtree(self);
|
||||
if (ts_subtree_child_count(tree) > 0) {
|
||||
return tree.ptr->named_child_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
TSNode ts_node_next_sibling(TSNode self) {
|
||||
return ts_node__next_sibling(self, true);
|
||||
}
|
||||
|
||||
TSNode ts_node_next_named_sibling(TSNode self) {
|
||||
return ts_node__next_sibling(self, false);
|
||||
}
|
||||
|
||||
TSNode ts_node_prev_sibling(TSNode self) {
|
||||
return ts_node__prev_sibling(self, true);
|
||||
}
|
||||
|
||||
TSNode ts_node_prev_named_sibling(TSNode self) {
|
||||
return ts_node__prev_sibling(self, false);
|
||||
}
|
||||
|
||||
TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
|
||||
return ts_node__first_child_for_byte(self, byte, true);
|
||||
}
|
||||
|
||||
TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
|
||||
return ts_node__first_child_for_byte(self, byte, false);
|
||||
}
|
||||
|
||||
TSNode ts_node_descendant_for_byte_range(
|
||||
TSNode self,
|
||||
uint32_t start,
|
||||
uint32_t end
|
||||
) {
|
||||
return ts_node__descendant_for_byte_range(self, start, end, true);
|
||||
}
|
||||
|
||||
TSNode ts_node_named_descendant_for_byte_range(
|
||||
TSNode self,
|
||||
uint32_t start,
|
||||
uint32_t end
|
||||
) {
|
||||
return ts_node__descendant_for_byte_range(self, start, end, false);
|
||||
}
|
||||
|
||||
TSNode ts_node_descendant_for_point_range(
|
||||
TSNode self,
|
||||
TSPoint start,
|
||||
TSPoint end
|
||||
) {
|
||||
return ts_node__descendant_for_point_range(self, start, end, true);
|
||||
}
|
||||
|
||||
TSNode ts_node_named_descendant_for_point_range(
|
||||
TSNode self,
|
||||
TSPoint start,
|
||||
TSPoint end
|
||||
) {
|
||||
return ts_node__descendant_for_point_range(self, start, end, false);
|
||||
}
|
||||
|
||||
void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
|
||||
uint32_t start_byte = ts_node_start_byte(*self);
|
||||
TSPoint start_point = ts_node_start_point(*self);
|
||||
|
||||
if (start_byte >= edit->old_end_byte) {
|
||||
start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
|
||||
start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
|
||||
} else if (start_byte > edit->start_byte) {
|
||||
start_byte = edit->new_end_byte;
|
||||
start_point = edit->new_end_point;
|
||||
}
|
||||
|
||||
self->context[0] = start_byte;
|
||||
self->context[1] = start_point.row;
|
||||
self->context[2] = start_point.column;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,265 @@
|
||||
#ifndef TREE_SITTER_PARSER_H_
|
||||
#define TREE_SITTER_PARSER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
#define ts_builtin_sym_end 0
|
||||
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
||||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSStateId;
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
TSFieldId field_id;
|
||||
uint8_t child_index;
|
||||
bool inherited;
|
||||
} TSFieldMapEntry;
|
||||
|
||||
typedef struct {
|
||||
uint16_t index;
|
||||
uint16_t length;
|
||||
} TSFieldMapSlice;
|
||||
|
||||
typedef struct {
|
||||
bool visible;
|
||||
bool named;
|
||||
bool supertype;
|
||||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct TSLexer TSLexer;
|
||||
|
||||
struct TSLexer {
|
||||
int32_t lookahead;
|
||||
TSSymbol result_symbol;
|
||||
void (*advance)(TSLexer *, bool);
|
||||
void (*mark_end)(TSLexer *);
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
TSParseActionTypeShift,
|
||||
TSParseActionTypeReduce,
|
||||
TSParseActionTypeAccept,
|
||||
TSParseActionTypeRecover,
|
||||
} TSParseActionType;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
uint8_t type;
|
||||
TSStateId state;
|
||||
bool extra;
|
||||
bool repetition;
|
||||
} shift;
|
||||
struct {
|
||||
uint8_t type;
|
||||
uint8_t child_count;
|
||||
TSSymbol symbol;
|
||||
int16_t dynamic_precedence;
|
||||
uint16_t production_id;
|
||||
} reduce;
|
||||
uint8_t type;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
uint8_t count;
|
||||
bool reusable;
|
||||
} entry;
|
||||
} TSParseActionEntry;
|
||||
|
||||
typedef struct {
|
||||
int32_t start;
|
||||
int32_t end;
|
||||
} TSCharacterRange;
|
||||
|
||||
struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
uint32_t alias_count;
|
||||
uint32_t token_count;
|
||||
uint32_t external_token_count;
|
||||
uint32_t state_count;
|
||||
uint32_t large_state_count;
|
||||
uint32_t production_id_count;
|
||||
uint32_t field_count;
|
||||
uint16_t max_alias_sequence_length;
|
||||
const uint16_t *parse_table;
|
||||
const uint16_t *small_parse_table;
|
||||
const uint32_t *small_parse_table_map;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const char * const *symbol_names;
|
||||
const char * const *field_names;
|
||||
const TSFieldMapSlice *field_map_slices;
|
||||
const TSFieldMapEntry *field_map_entries;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const TSSymbol *public_symbol_map;
|
||||
const uint16_t *alias_map;
|
||||
const TSSymbol *alias_sequences;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||
TSSymbol keyword_capture_token;
|
||||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)(void);
|
||||
void (*destroy)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
const TSStateId *primary_state_ids;
|
||||
};
|
||||
|
||||
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
||||
uint32_t index = 0;
|
||||
uint32_t size = len - index;
|
||||
while (size > 1) {
|
||||
uint32_t half_size = size / 2;
|
||||
uint32_t mid_index = index + half_size;
|
||||
TSCharacterRange *range = &ranges[mid_index];
|
||||
if (lookahead >= range->start && lookahead <= range->end) {
|
||||
return true;
|
||||
} else if (lookahead > range->end) {
|
||||
index = mid_index;
|
||||
}
|
||||
size -= half_size;
|
||||
}
|
||||
TSCharacterRange *range = &ranges[index];
|
||||
return (lookahead >= range->start && lookahead <= range->end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define UNUSED __pragma(warning(suppress : 4101))
|
||||
#else
|
||||
#define UNUSED __attribute__((unused))
|
||||
#endif
|
||||
|
||||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
UNUSED \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
next_state: \
|
||||
lexer->advance(lexer, skip); \
|
||||
start: \
|
||||
skip = false; \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ADVANCE_MAP(...) \
|
||||
{ \
|
||||
static const uint16_t map[] = { __VA_ARGS__ }; \
|
||||
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
|
||||
if (map[i] == lookahead) { \
|
||||
state = map[i + 1]; \
|
||||
goto next_state; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
skip = true; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define ACCEPT_TOKEN(symbol_value) \
|
||||
result = true; \
|
||||
lexer->result_symbol = symbol_value; \
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
#define END_STATE() return result;
|
||||
|
||||
/*
|
||||
* Parse Table Macros
|
||||
*/
|
||||
|
||||
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
|
||||
|
||||
#define STATE(id) id
|
||||
|
||||
#define ACTIONS(id) id
|
||||
|
||||
#define SHIFT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value) \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_REPEAT(state_value) \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.state = (state_value), \
|
||||
.repetition = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define SHIFT_EXTRA() \
|
||||
{{ \
|
||||
.shift = { \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.extra = true \
|
||||
} \
|
||||
}}
|
||||
|
||||
#define REDUCE(symbol_name, children, precedence, prod_id) \
|
||||
{{ \
|
||||
.reduce = { \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_name, \
|
||||
.child_count = children, \
|
||||
.dynamic_precedence = precedence, \
|
||||
.production_id = prod_id \
|
||||
}, \
|
||||
}}
|
||||
|
||||
#define RECOVER() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeRecover \
|
||||
}}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{{ \
|
||||
.type = TSParseActionTypeAccept \
|
||||
}}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_PARSER_H_
|
@ -0,0 +1,62 @@
|
||||
#ifndef TREE_SITTER_POINT_H_
|
||||
#define TREE_SITTER_POINT_H_
|
||||
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
#define POINT_ZERO ((TSPoint) {0, 0})
|
||||
#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
|
||||
|
||||
static inline TSPoint point__new(unsigned row, unsigned column) {
|
||||
TSPoint result = {row, column};
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline TSPoint point_add(TSPoint a, TSPoint b) {
|
||||
if (b.row > 0)
|
||||
return point__new(a.row + b.row, b.column);
|
||||
else
|
||||
return point__new(a.row, a.column + b.column);
|
||||
}
|
||||
|
||||
static inline TSPoint point_sub(TSPoint a, TSPoint b) {
|
||||
if (a.row > b.row)
|
||||
return point__new(a.row - b.row, a.column);
|
||||
else
|
||||
return point__new(0, a.column - b.column);
|
||||
}
|
||||
|
||||
static inline bool point_lte(TSPoint a, TSPoint b) {
|
||||
return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
|
||||
}
|
||||
|
||||
static inline bool point_lt(TSPoint a, TSPoint b) {
|
||||
return (a.row < b.row) || (a.row == b.row && a.column < b.column);
|
||||
}
|
||||
|
||||
static inline bool point_gt(TSPoint a, TSPoint b) {
|
||||
return (a.row > b.row) || (a.row == b.row && a.column > b.column);
|
||||
}
|
||||
|
||||
static inline bool point_gte(TSPoint a, TSPoint b) {
|
||||
return (a.row > b.row) || (a.row == b.row && a.column >= b.column);
|
||||
}
|
||||
|
||||
static inline bool point_eq(TSPoint a, TSPoint b) {
|
||||
return a.row == b.row && a.column == b.column;
|
||||
}
|
||||
|
||||
static inline TSPoint point_min(TSPoint a, TSPoint b) {
|
||||
if (a.row < b.row || (a.row == b.row && a.column < b.column))
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
static inline TSPoint point_max(TSPoint a, TSPoint b) {
|
||||
if (a.row > b.row || (a.row == b.row && a.column > b.column))
|
||||
return a;
|
||||
else
|
||||
return b;
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,34 @@
|
||||
#ifndef TREE_SITTER_REDUCE_ACTION_H_
|
||||
#define TREE_SITTER_REDUCE_ACTION_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./array.h"
|
||||
#include "tree_sitter/api.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t count;
|
||||
TSSymbol symbol;
|
||||
int dynamic_precedence;
|
||||
unsigned short production_id;
|
||||
} ReduceAction;
|
||||
|
||||
typedef Array(ReduceAction) ReduceActionSet;
|
||||
|
||||
static inline void ts_reduce_action_set_add(ReduceActionSet *self,
|
||||
ReduceAction new_action) {
|
||||
for (uint32_t i = 0; i < self->size; i++) {
|
||||
ReduceAction action = self->contents[i];
|
||||
if (action.symbol == new_action.symbol && action.count == new_action.count)
|
||||
return;
|
||||
}
|
||||
array_push(self, new_action);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_REDUCE_ACTION_H_
|
@ -0,0 +1,95 @@
|
||||
#include "./subtree.h"
|
||||
|
||||
typedef struct {
|
||||
Subtree tree;
|
||||
uint32_t child_index;
|
||||
uint32_t byte_offset;
|
||||
} StackEntry;
|
||||
|
||||
typedef struct {
|
||||
Array(StackEntry) stack;
|
||||
Subtree last_external_token;
|
||||
} ReusableNode;
|
||||
|
||||
static inline ReusableNode reusable_node_new(void) {
|
||||
return (ReusableNode) {array_new(), NULL_SUBTREE};
|
||||
}
|
||||
|
||||
static inline void reusable_node_clear(ReusableNode *self) {
|
||||
array_clear(&self->stack);
|
||||
self->last_external_token = NULL_SUBTREE;
|
||||
}
|
||||
|
||||
static inline Subtree reusable_node_tree(ReusableNode *self) {
|
||||
return self->stack.size > 0
|
||||
? self->stack.contents[self->stack.size - 1].tree
|
||||
: NULL_SUBTREE;
|
||||
}
|
||||
|
||||
static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
|
||||
return self->stack.size > 0
|
||||
? self->stack.contents[self->stack.size - 1].byte_offset
|
||||
: UINT32_MAX;
|
||||
}
|
||||
|
||||
static inline void reusable_node_delete(ReusableNode *self) {
|
||||
array_delete(&self->stack);
|
||||
}
|
||||
|
||||
static inline void reusable_node_advance(ReusableNode *self) {
|
||||
StackEntry last_entry = *array_back(&self->stack);
|
||||
uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
|
||||
if (ts_subtree_has_external_tokens(last_entry.tree)) {
|
||||
self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
|
||||
}
|
||||
|
||||
Subtree tree;
|
||||
uint32_t next_index;
|
||||
do {
|
||||
StackEntry popped_entry = array_pop(&self->stack);
|
||||
next_index = popped_entry.child_index + 1;
|
||||
if (self->stack.size == 0) return;
|
||||
tree = array_back(&self->stack)->tree;
|
||||
} while (ts_subtree_child_count(tree) <= next_index);
|
||||
|
||||
array_push(&self->stack, ((StackEntry) {
|
||||
.tree = ts_subtree_children(tree)[next_index],
|
||||
.child_index = next_index,
|
||||
.byte_offset = byte_offset,
|
||||
}));
|
||||
}
|
||||
|
||||
static inline bool reusable_node_descend(ReusableNode *self) {
|
||||
StackEntry last_entry = *array_back(&self->stack);
|
||||
if (ts_subtree_child_count(last_entry.tree) > 0) {
|
||||
array_push(&self->stack, ((StackEntry) {
|
||||
.tree = ts_subtree_children(last_entry.tree)[0],
|
||||
.child_index = 0,
|
||||
.byte_offset = last_entry.byte_offset,
|
||||
}));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
|
||||
while (reusable_node_descend(self)) {}
|
||||
reusable_node_advance(self);
|
||||
}
|
||||
|
||||
static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
|
||||
reusable_node_clear(self);
|
||||
array_push(&self->stack, ((StackEntry) {
|
||||
.tree = tree,
|
||||
.child_index = 0,
|
||||
.byte_offset = 0,
|
||||
}));
|
||||
|
||||
// Never reuse the root node, because it has a non-standard internal structure
|
||||
// due to transformations that are applied when it is accepted: adding the EOF
|
||||
// child and any extra children.
|
||||
if (!reusable_node_descend(self)) {
|
||||
reusable_node_clear(self);
|
||||
}
|
||||
}
|
@ -0,0 +1,899 @@
|
||||
#include "./alloc.h"
|
||||
#include "./language.h"
|
||||
#include "./subtree.h"
|
||||
#include "./array.h"
|
||||
#include "./stack.h"
|
||||
#include "./length.h"
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define MAX_LINK_COUNT 8
|
||||
#define MAX_NODE_POOL_SIZE 50
|
||||
#define MAX_ITERATOR_COUNT 64
|
||||
|
||||
#if defined _WIN32 && !defined __GNUC__
|
||||
#define forceinline __forceinline
|
||||
#else
|
||||
#define forceinline static inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
typedef struct StackNode StackNode;
|
||||
|
||||
typedef struct {
|
||||
StackNode *node;
|
||||
Subtree subtree;
|
||||
bool is_pending;
|
||||
} StackLink;
|
||||
|
||||
struct StackNode {
|
||||
TSStateId state;
|
||||
Length position;
|
||||
StackLink links[MAX_LINK_COUNT];
|
||||
short unsigned int link_count;
|
||||
uint32_t ref_count;
|
||||
unsigned error_cost;
|
||||
unsigned node_count;
|
||||
int dynamic_precedence;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
StackNode *node;
|
||||
SubtreeArray subtrees;
|
||||
uint32_t subtree_count;
|
||||
bool is_pending;
|
||||
} StackIterator;
|
||||
|
||||
typedef Array(StackNode *) StackNodeArray;
|
||||
|
||||
typedef enum {
|
||||
StackStatusActive,
|
||||
StackStatusPaused,
|
||||
StackStatusHalted,
|
||||
} StackStatus;
|
||||
|
||||
typedef struct {
|
||||
StackNode *node;
|
||||
StackSummary *summary;
|
||||
unsigned node_count_at_last_error;
|
||||
Subtree last_external_token;
|
||||
Subtree lookahead_when_paused;
|
||||
StackStatus status;
|
||||
} StackHead;
|
||||
|
||||
struct Stack {
|
||||
Array(StackHead) heads;
|
||||
StackSliceArray slices;
|
||||
Array(StackIterator) iterators;
|
||||
StackNodeArray node_pool;
|
||||
StackNode *base_node;
|
||||
SubtreePool *subtree_pool;
|
||||
};
|
||||
|
||||
typedef unsigned StackAction;
|
||||
enum {
|
||||
StackActionNone,
|
||||
StackActionStop = 1,
|
||||
StackActionPop = 2,
|
||||
};
|
||||
|
||||
typedef StackAction (*StackCallback)(void *, const StackIterator *);
|
||||
|
||||
static void stack_node_retain(StackNode *self) {
|
||||
if (!self)
|
||||
return;
|
||||
assert(self->ref_count > 0);
|
||||
self->ref_count++;
|
||||
assert(self->ref_count != 0);
|
||||
}
|
||||
|
||||
static void stack_node_release(
|
||||
StackNode *self,
|
||||
StackNodeArray *pool,
|
||||
SubtreePool *subtree_pool
|
||||
) {
|
||||
recur:
|
||||
assert(self->ref_count != 0);
|
||||
self->ref_count--;
|
||||
if (self->ref_count > 0) return;
|
||||
|
||||
StackNode *first_predecessor = NULL;
|
||||
if (self->link_count > 0) {
|
||||
for (unsigned i = self->link_count - 1; i > 0; i--) {
|
||||
StackLink link = self->links[i];
|
||||
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
|
||||
stack_node_release(link.node, pool, subtree_pool);
|
||||
}
|
||||
StackLink link = self->links[0];
|
||||
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
|
||||
first_predecessor = self->links[0].node;
|
||||
}
|
||||
|
||||
if (pool->size < MAX_NODE_POOL_SIZE) {
|
||||
array_push(pool, self);
|
||||
} else {
|
||||
ts_free(self);
|
||||
}
|
||||
|
||||
if (first_predecessor) {
|
||||
self = first_predecessor;
|
||||
goto recur;
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of nodes in the subtree, for the purpose of measuring
|
||||
/// how much progress has been made by a given version of the stack.
|
||||
static uint32_t stack__subtree_node_count(Subtree subtree) {
|
||||
uint32_t count = ts_subtree_visible_descendant_count(subtree);
|
||||
if (ts_subtree_visible(subtree)) count++;
|
||||
|
||||
// Count intermediate error nodes even though they are not visible,
|
||||
// because a stack version's node count is used to check whether it
|
||||
// has made any progress since the last time it encountered an error.
|
||||
if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static StackNode *stack_node_new(
|
||||
StackNode *previous_node,
|
||||
Subtree subtree,
|
||||
bool is_pending,
|
||||
TSStateId state,
|
||||
StackNodeArray *pool
|
||||
) {
|
||||
StackNode *node = pool->size > 0
|
||||
? array_pop(pool)
|
||||
: ts_malloc(sizeof(StackNode));
|
||||
*node = (StackNode) {
|
||||
.ref_count = 1,
|
||||
.link_count = 0,
|
||||
.state = state
|
||||
};
|
||||
|
||||
if (previous_node) {
|
||||
node->link_count = 1;
|
||||
node->links[0] = (StackLink) {
|
||||
.node = previous_node,
|
||||
.subtree = subtree,
|
||||
.is_pending = is_pending,
|
||||
};
|
||||
|
||||
node->position = previous_node->position;
|
||||
node->error_cost = previous_node->error_cost;
|
||||
node->dynamic_precedence = previous_node->dynamic_precedence;
|
||||
node->node_count = previous_node->node_count;
|
||||
|
||||
if (subtree.ptr) {
|
||||
node->error_cost += ts_subtree_error_cost(subtree);
|
||||
node->position = length_add(node->position, ts_subtree_total_size(subtree));
|
||||
node->node_count += stack__subtree_node_count(subtree);
|
||||
node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
|
||||
}
|
||||
} else {
|
||||
node->position = length_zero();
|
||||
node->error_cost = 0;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
|
||||
if (left.ptr == right.ptr) return true;
|
||||
if (!left.ptr || !right.ptr) return false;
|
||||
|
||||
// Symbols must match
|
||||
if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false;
|
||||
|
||||
// If both have errors, don't bother keeping both.
|
||||
if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true;
|
||||
|
||||
return (
|
||||
ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
|
||||
ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
|
||||
ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
|
||||
ts_subtree_extra(left) == ts_subtree_extra(right) &&
|
||||
ts_subtree_external_scanner_state_eq(left, right)
|
||||
);
|
||||
}
|
||||
|
||||
static void stack_node_add_link(
|
||||
StackNode *self,
|
||||
StackLink link,
|
||||
SubtreePool *subtree_pool
|
||||
) {
|
||||
if (link.node == self) return;
|
||||
|
||||
for (int i = 0; i < self->link_count; i++) {
|
||||
StackLink *existing_link = &self->links[i];
|
||||
if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
|
||||
// In general, we preserve ambiguities until they are removed from the stack
|
||||
// during a pop operation where multiple paths lead to the same node. But in
|
||||
// the special case where two links directly connect the same pair of nodes,
|
||||
// we can safely remove the ambiguity ahead of time without changing behavior.
|
||||
if (existing_link->node == link.node) {
|
||||
if (
|
||||
ts_subtree_dynamic_precedence(link.subtree) >
|
||||
ts_subtree_dynamic_precedence(existing_link->subtree)
|
||||
) {
|
||||
ts_subtree_retain(link.subtree);
|
||||
ts_subtree_release(subtree_pool, existing_link->subtree);
|
||||
existing_link->subtree = link.subtree;
|
||||
self->dynamic_precedence =
|
||||
link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// If the previous nodes are mergeable, merge them recursively.
|
||||
if (
|
||||
existing_link->node->state == link.node->state &&
|
||||
existing_link->node->position.bytes == link.node->position.bytes &&
|
||||
existing_link->node->error_cost == link.node->error_cost
|
||||
) {
|
||||
for (int j = 0; j < link.node->link_count; j++) {
|
||||
stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
|
||||
}
|
||||
int32_t dynamic_precedence = link.node->dynamic_precedence;
|
||||
if (link.subtree.ptr) {
|
||||
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
|
||||
}
|
||||
if (dynamic_precedence > self->dynamic_precedence) {
|
||||
self->dynamic_precedence = dynamic_precedence;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (self->link_count == MAX_LINK_COUNT) return;
|
||||
|
||||
stack_node_retain(link.node);
|
||||
unsigned node_count = link.node->node_count;
|
||||
int dynamic_precedence = link.node->dynamic_precedence;
|
||||
self->links[self->link_count++] = link;
|
||||
|
||||
if (link.subtree.ptr) {
|
||||
ts_subtree_retain(link.subtree);
|
||||
node_count += stack__subtree_node_count(link.subtree);
|
||||
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
|
||||
}
|
||||
|
||||
if (node_count > self->node_count) self->node_count = node_count;
|
||||
if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
|
||||
}
|
||||
|
||||
static void stack_head_delete(
|
||||
StackHead *self,
|
||||
StackNodeArray *pool,
|
||||
SubtreePool *subtree_pool
|
||||
) {
|
||||
if (self->node) {
|
||||
if (self->last_external_token.ptr) {
|
||||
ts_subtree_release(subtree_pool, self->last_external_token);
|
||||
}
|
||||
if (self->lookahead_when_paused.ptr) {
|
||||
ts_subtree_release(subtree_pool, self->lookahead_when_paused);
|
||||
}
|
||||
if (self->summary) {
|
||||
array_delete(self->summary);
|
||||
ts_free(self->summary);
|
||||
}
|
||||
stack_node_release(self->node, pool, subtree_pool);
|
||||
}
|
||||
}
|
||||
|
||||
static StackVersion ts_stack__add_version(
|
||||
Stack *self,
|
||||
StackVersion original_version,
|
||||
StackNode *node
|
||||
) {
|
||||
StackHead head = {
|
||||
.node = node,
|
||||
.node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
|
||||
.last_external_token = self->heads.contents[original_version].last_external_token,
|
||||
.status = StackStatusActive,
|
||||
.lookahead_when_paused = NULL_SUBTREE,
|
||||
};
|
||||
array_push(&self->heads, head);
|
||||
stack_node_retain(node);
|
||||
if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
|
||||
return (StackVersion)(self->heads.size - 1);
|
||||
}
|
||||
|
||||
static void ts_stack__add_slice(
|
||||
Stack *self,
|
||||
StackVersion original_version,
|
||||
StackNode *node,
|
||||
SubtreeArray *subtrees
|
||||
) {
|
||||
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
|
||||
StackVersion version = self->slices.contents[i].version;
|
||||
if (self->heads.contents[version].node == node) {
|
||||
StackSlice slice = {*subtrees, version};
|
||||
array_insert(&self->slices, i + 1, slice);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
StackVersion version = ts_stack__add_version(self, original_version, node);
|
||||
StackSlice slice = { *subtrees, version };
|
||||
array_push(&self->slices, slice);
|
||||
}
|
||||
|
||||
static StackSliceArray stack__iter(
|
||||
Stack *self,
|
||||
StackVersion version,
|
||||
StackCallback callback,
|
||||
void *payload,
|
||||
int goal_subtree_count
|
||||
) {
|
||||
array_clear(&self->slices);
|
||||
array_clear(&self->iterators);
|
||||
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
StackIterator new_iterator = {
|
||||
.node = head->node,
|
||||
.subtrees = array_new(),
|
||||
.subtree_count = 0,
|
||||
.is_pending = true,
|
||||
};
|
||||
|
||||
bool include_subtrees = false;
|
||||
if (goal_subtree_count >= 0) {
|
||||
include_subtrees = true;
|
||||
array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
|
||||
}
|
||||
|
||||
array_push(&self->iterators, new_iterator);
|
||||
|
||||
while (self->iterators.size > 0) {
|
||||
for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
|
||||
StackIterator *iterator = &self->iterators.contents[i];
|
||||
StackNode *node = iterator->node;
|
||||
|
||||
StackAction action = callback(payload, iterator);
|
||||
bool should_pop = action & StackActionPop;
|
||||
bool should_stop = action & StackActionStop || node->link_count == 0;
|
||||
|
||||
if (should_pop) {
|
||||
SubtreeArray subtrees = iterator->subtrees;
|
||||
if (!should_stop) {
|
||||
ts_subtree_array_copy(subtrees, &subtrees);
|
||||
}
|
||||
ts_subtree_array_reverse(&subtrees);
|
||||
ts_stack__add_slice(
|
||||
self,
|
||||
version,
|
||||
node,
|
||||
&subtrees
|
||||
);
|
||||
}
|
||||
|
||||
if (should_stop) {
|
||||
if (!should_pop) {
|
||||
ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
|
||||
}
|
||||
array_erase(&self->iterators, i);
|
||||
i--, size--;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (uint32_t j = 1; j <= node->link_count; j++) {
|
||||
StackIterator *next_iterator;
|
||||
StackLink link;
|
||||
if (j == node->link_count) {
|
||||
link = node->links[0];
|
||||
next_iterator = &self->iterators.contents[i];
|
||||
} else {
|
||||
if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
|
||||
link = node->links[j];
|
||||
StackIterator current_iterator = self->iterators.contents[i];
|
||||
array_push(&self->iterators, current_iterator);
|
||||
next_iterator = array_back(&self->iterators);
|
||||
ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
|
||||
}
|
||||
|
||||
next_iterator->node = link.node;
|
||||
if (link.subtree.ptr) {
|
||||
if (include_subtrees) {
|
||||
array_push(&next_iterator->subtrees, link.subtree);
|
||||
ts_subtree_retain(link.subtree);
|
||||
}
|
||||
|
||||
if (!ts_subtree_extra(link.subtree)) {
|
||||
next_iterator->subtree_count++;
|
||||
if (!link.is_pending) {
|
||||
next_iterator->is_pending = false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
next_iterator->subtree_count++;
|
||||
next_iterator->is_pending = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return self->slices;
|
||||
}
|
||||
|
||||
Stack *ts_stack_new(SubtreePool *subtree_pool) {
|
||||
Stack *self = ts_calloc(1, sizeof(Stack));
|
||||
|
||||
array_init(&self->heads);
|
||||
array_init(&self->slices);
|
||||
array_init(&self->iterators);
|
||||
array_init(&self->node_pool);
|
||||
array_reserve(&self->heads, 4);
|
||||
array_reserve(&self->slices, 4);
|
||||
array_reserve(&self->iterators, 4);
|
||||
array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
|
||||
|
||||
self->subtree_pool = subtree_pool;
|
||||
self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
|
||||
ts_stack_clear(self);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void ts_stack_delete(Stack *self) {
|
||||
if (self->slices.contents)
|
||||
array_delete(&self->slices);
|
||||
if (self->iterators.contents)
|
||||
array_delete(&self->iterators);
|
||||
stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
|
||||
for (uint32_t i = 0; i < self->heads.size; i++) {
|
||||
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
|
||||
}
|
||||
array_clear(&self->heads);
|
||||
if (self->node_pool.contents) {
|
||||
for (uint32_t i = 0; i < self->node_pool.size; i++)
|
||||
ts_free(self->node_pool.contents[i]);
|
||||
array_delete(&self->node_pool);
|
||||
}
|
||||
array_delete(&self->heads);
|
||||
ts_free(self);
|
||||
}
|
||||
|
||||
uint32_t ts_stack_version_count(const Stack *self) {
|
||||
return self->heads.size;
|
||||
}
|
||||
|
||||
TSStateId ts_stack_state(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->node->state;
|
||||
}
|
||||
|
||||
Length ts_stack_position(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->node->position;
|
||||
}
|
||||
|
||||
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->last_external_token;
|
||||
}
|
||||
|
||||
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
if (token.ptr) ts_subtree_retain(token);
|
||||
if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
|
||||
head->last_external_token = token;
|
||||
}
|
||||
|
||||
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
unsigned result = head->node->error_cost;
|
||||
if (
|
||||
head->status == StackStatusPaused ||
|
||||
(head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
|
||||
result += ERROR_COST_PER_RECOVERY;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
if (head->node->node_count < head->node_count_at_last_error) {
|
||||
head->node_count_at_last_error = head->node->node_count;
|
||||
}
|
||||
return head->node->node_count - head->node_count_at_last_error;
|
||||
}
|
||||
|
||||
void ts_stack_push(
|
||||
Stack *self,
|
||||
StackVersion version,
|
||||
Subtree subtree,
|
||||
bool pending,
|
||||
TSStateId state
|
||||
) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
|
||||
if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
|
||||
head->node = new_node;
|
||||
}
|
||||
|
||||
forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
|
||||
unsigned *goal_subtree_count = payload;
|
||||
if (iterator->subtree_count == *goal_subtree_count) {
|
||||
return StackActionPop | StackActionStop;
|
||||
} else {
|
||||
return StackActionNone;
|
||||
}
|
||||
}
|
||||
|
||||
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
|
||||
return stack__iter(self, version, pop_count_callback, &count, (int)count);
|
||||
}
|
||||
|
||||
forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
|
||||
(void)payload;
|
||||
if (iterator->subtree_count >= 1) {
|
||||
if (iterator->is_pending) {
|
||||
return StackActionPop | StackActionStop;
|
||||
} else {
|
||||
return StackActionStop;
|
||||
}
|
||||
} else {
|
||||
return StackActionNone;
|
||||
}
|
||||
}
|
||||
|
||||
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
|
||||
StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
|
||||
if (pop.size > 0) {
|
||||
ts_stack_renumber_version(self, pop.contents[0].version, version);
|
||||
pop.contents[0].version = version;
|
||||
}
|
||||
return pop;
|
||||
}
|
||||
|
||||
forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
|
||||
if (iterator->subtrees.size > 0) {
|
||||
bool *found_error = payload;
|
||||
if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) {
|
||||
*found_error = true;
|
||||
return StackActionPop | StackActionStop;
|
||||
} else {
|
||||
return StackActionStop;
|
||||
}
|
||||
} else {
|
||||
return StackActionNone;
|
||||
}
|
||||
}
|
||||
|
||||
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
|
||||
StackNode *node = array_get(&self->heads, version)->node;
|
||||
for (unsigned i = 0; i < node->link_count; i++) {
|
||||
if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
|
||||
bool found_error = false;
|
||||
StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
|
||||
if (pop.size > 0) {
|
||||
assert(pop.size == 1);
|
||||
ts_stack_renumber_version(self, pop.contents[0].version, version);
|
||||
return pop.contents[0].subtrees;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (SubtreeArray) {.size = 0};
|
||||
}
|
||||
|
||||
forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
|
||||
(void)payload;
|
||||
return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
|
||||
}
|
||||
|
||||
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
|
||||
return stack__iter(self, version, pop_all_callback, NULL, 0);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
StackSummary *summary;
|
||||
unsigned max_depth;
|
||||
} SummarizeStackSession;
|
||||
|
||||
forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
|
||||
SummarizeStackSession *session = payload;
|
||||
TSStateId state = iterator->node->state;
|
||||
unsigned depth = iterator->subtree_count;
|
||||
if (depth > session->max_depth) return StackActionStop;
|
||||
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
|
||||
StackSummaryEntry entry = session->summary->contents[i];
|
||||
if (entry.depth < depth) break;
|
||||
if (entry.depth == depth && entry.state == state) return StackActionNone;
|
||||
}
|
||||
array_push(session->summary, ((StackSummaryEntry) {
|
||||
.position = iterator->node->position,
|
||||
.depth = depth,
|
||||
.state = state,
|
||||
}));
|
||||
return StackActionNone;
|
||||
}
|
||||
|
||||
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
|
||||
SummarizeStackSession session = {
|
||||
.summary = ts_malloc(sizeof(StackSummary)),
|
||||
.max_depth = max_depth
|
||||
};
|
||||
array_init(session.summary);
|
||||
stack__iter(self, version, summarize_stack_callback, &session, -1);
|
||||
StackHead *head = &self->heads.contents[version];
|
||||
if (head->summary) {
|
||||
array_delete(head->summary);
|
||||
ts_free(head->summary);
|
||||
}
|
||||
head->summary = session.summary;
|
||||
}
|
||||
|
||||
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->summary;
|
||||
}
|
||||
|
||||
int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->node->dynamic_precedence;
|
||||
}
|
||||
|
||||
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
|
||||
const StackHead *head = array_get(&self->heads, version);
|
||||
const StackNode *node = head->node;
|
||||
if (node->error_cost == 0) return true;
|
||||
while (node) {
|
||||
if (node->link_count > 0) {
|
||||
Subtree subtree = node->links[0].subtree;
|
||||
if (subtree.ptr) {
|
||||
if (ts_subtree_total_bytes(subtree) > 0) {
|
||||
return true;
|
||||
} else if (
|
||||
node->node_count > head->node_count_at_last_error &&
|
||||
ts_subtree_error_cost(subtree) == 0
|
||||
) {
|
||||
node = node->links[0].node;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ts_stack_remove_version(Stack *self, StackVersion version) {
|
||||
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
|
||||
array_erase(&self->heads, version);
|
||||
}
|
||||
|
||||
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
|
||||
if (v1 == v2) return;
|
||||
assert(v2 < v1);
|
||||
assert((uint32_t)v1 < self->heads.size);
|
||||
StackHead *source_head = &self->heads.contents[v1];
|
||||
StackHead *target_head = &self->heads.contents[v2];
|
||||
if (target_head->summary && !source_head->summary) {
|
||||
source_head->summary = target_head->summary;
|
||||
target_head->summary = NULL;
|
||||
}
|
||||
stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
|
||||
*target_head = *source_head;
|
||||
array_erase(&self->heads, v1);
|
||||
}
|
||||
|
||||
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
|
||||
StackHead temporary_head = self->heads.contents[v1];
|
||||
self->heads.contents[v1] = self->heads.contents[v2];
|
||||
self->heads.contents[v2] = temporary_head;
|
||||
}
|
||||
|
||||
StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
|
||||
assert(version < self->heads.size);
|
||||
array_push(&self->heads, self->heads.contents[version]);
|
||||
StackHead *head = array_back(&self->heads);
|
||||
stack_node_retain(head->node);
|
||||
if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
|
||||
head->summary = NULL;
|
||||
return self->heads.size - 1;
|
||||
}
|
||||
|
||||
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
||||
if (!ts_stack_can_merge(self, version1, version2)) return false;
|
||||
StackHead *head1 = &self->heads.contents[version1];
|
||||
StackHead *head2 = &self->heads.contents[version2];
|
||||
for (uint32_t i = 0; i < head2->node->link_count; i++) {
|
||||
stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
|
||||
}
|
||||
if (head1->node->state == ERROR_STATE) {
|
||||
head1->node_count_at_last_error = head1->node->node_count;
|
||||
}
|
||||
ts_stack_remove_version(self, version2);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
|
||||
StackHead *head1 = &self->heads.contents[version1];
|
||||
StackHead *head2 = &self->heads.contents[version2];
|
||||
return
|
||||
head1->status == StackStatusActive &&
|
||||
head2->status == StackStatusActive &&
|
||||
head1->node->state == head2->node->state &&
|
||||
head1->node->position.bytes == head2->node->position.bytes &&
|
||||
head1->node->error_cost == head2->node->error_cost &&
|
||||
ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
|
||||
}
|
||||
|
||||
void ts_stack_halt(Stack *self, StackVersion version) {
|
||||
array_get(&self->heads, version)->status = StackStatusHalted;
|
||||
}
|
||||
|
||||
void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
head->status = StackStatusPaused;
|
||||
head->lookahead_when_paused = lookahead;
|
||||
head->node_count_at_last_error = head->node->node_count;
|
||||
}
|
||||
|
||||
bool ts_stack_is_active(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->status == StackStatusActive;
|
||||
}
|
||||
|
||||
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->status == StackStatusHalted;
|
||||
}
|
||||
|
||||
bool ts_stack_is_paused(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->status == StackStatusPaused;
|
||||
}
|
||||
|
||||
Subtree ts_stack_resume(Stack *self, StackVersion version) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
assert(head->status == StackStatusPaused);
|
||||
Subtree result = head->lookahead_when_paused;
|
||||
head->status = StackStatusActive;
|
||||
head->lookahead_when_paused = NULL_SUBTREE;
|
||||
return result;
|
||||
}
|
||||
|
||||
void ts_stack_clear(Stack *self) {
|
||||
stack_node_retain(self->base_node);
|
||||
for (uint32_t i = 0; i < self->heads.size; i++) {
|
||||
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
|
||||
}
|
||||
array_clear(&self->heads);
|
||||
array_push(&self->heads, ((StackHead) {
|
||||
.node = self->base_node,
|
||||
.status = StackStatusActive,
|
||||
.last_external_token = NULL_SUBTREE,
|
||||
.lookahead_when_paused = NULL_SUBTREE,
|
||||
}));
|
||||
}
|
||||
|
||||
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
|
||||
array_reserve(&self->iterators, 32);
|
||||
if (!f) f = stderr;
|
||||
|
||||
fprintf(f, "digraph stack {\n");
|
||||
fprintf(f, "rankdir=\"RL\";\n");
|
||||
fprintf(f, "edge [arrowhead=none]\n");
|
||||
|
||||
Array(StackNode *) visited_nodes = array_new();
|
||||
|
||||
array_clear(&self->iterators);
|
||||
for (uint32_t i = 0; i < self->heads.size; i++) {
|
||||
StackHead *head = &self->heads.contents[i];
|
||||
if (head->status == StackStatusHalted) continue;
|
||||
|
||||
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
|
||||
fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node);
|
||||
|
||||
if (head->status == StackStatusPaused) {
|
||||
fprintf(f, "color=red ");
|
||||
}
|
||||
fprintf(f,
|
||||
"label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
|
||||
i,
|
||||
ts_stack_node_count_since_error(self, i),
|
||||
ts_stack_error_cost(self, i)
|
||||
);
|
||||
|
||||
if (head->summary) {
|
||||
fprintf(f, "\nsummary:");
|
||||
for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state);
|
||||
}
|
||||
|
||||
if (head->last_external_token.ptr) {
|
||||
const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
|
||||
const char *data = ts_external_scanner_state_data(state);
|
||||
fprintf(f, "\nexternal_scanner_state:");
|
||||
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
|
||||
}
|
||||
|
||||
fprintf(f, "\"]\n");
|
||||
array_push(&self->iterators, ((StackIterator) {
|
||||
.node = head->node
|
||||
}));
|
||||
}
|
||||
|
||||
bool all_iterators_done = false;
|
||||
while (!all_iterators_done) {
|
||||
all_iterators_done = true;
|
||||
|
||||
for (uint32_t i = 0; i < self->iterators.size; i++) {
|
||||
StackIterator iterator = self->iterators.contents[i];
|
||||
StackNode *node = iterator.node;
|
||||
|
||||
for (uint32_t j = 0; j < visited_nodes.size; j++) {
|
||||
if (visited_nodes.contents[j] == node) {
|
||||
node = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!node) continue;
|
||||
all_iterators_done = false;
|
||||
|
||||
fprintf(f, "node_%p [", (void *)node);
|
||||
if (node->state == ERROR_STATE) {
|
||||
fprintf(f, "label=\"?\"");
|
||||
} else if (
|
||||
node->link_count == 1 &&
|
||||
node->links[0].subtree.ptr &&
|
||||
ts_subtree_extra(node->links[0].subtree)
|
||||
) {
|
||||
fprintf(f, "shape=point margin=0 label=\"\"");
|
||||
} else {
|
||||
fprintf(f, "label=\"%d\"", node->state);
|
||||
}
|
||||
|
||||
fprintf(
|
||||
f,
|
||||
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
|
||||
node->position.extent.row + 1,
|
||||
node->position.extent.column,
|
||||
node->node_count,
|
||||
node->error_cost,
|
||||
node->dynamic_precedence
|
||||
);
|
||||
|
||||
for (int j = 0; j < node->link_count; j++) {
|
||||
StackLink link = node->links[j];
|
||||
fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node);
|
||||
if (link.is_pending) fprintf(f, "style=dashed ");
|
||||
if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
|
||||
|
||||
if (!link.subtree.ptr) {
|
||||
fprintf(f, "color=red");
|
||||
} else {
|
||||
fprintf(f, "label=\"");
|
||||
bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
|
||||
if (quoted) fprintf(f, "'");
|
||||
ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree));
|
||||
if (quoted) fprintf(f, "'");
|
||||
fprintf(f, "\"");
|
||||
fprintf(
|
||||
f,
|
||||
"labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"",
|
||||
ts_subtree_error_cost(link.subtree),
|
||||
ts_subtree_dynamic_precedence(link.subtree)
|
||||
);
|
||||
}
|
||||
|
||||
fprintf(f, "];\n");
|
||||
|
||||
StackIterator *next_iterator;
|
||||
if (j == 0) {
|
||||
next_iterator = &self->iterators.contents[i];
|
||||
} else {
|
||||
array_push(&self->iterators, iterator);
|
||||
next_iterator = array_back(&self->iterators);
|
||||
}
|
||||
next_iterator->node = link.node;
|
||||
}
|
||||
|
||||
array_push(&visited_nodes, node);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
|
||||
array_delete(&visited_nodes);
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef forceinline
|
@ -0,0 +1,133 @@
|
||||
#ifndef TREE_SITTER_PARSE_STACK_H_
|
||||
#define TREE_SITTER_PARSE_STACK_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "./array.h"
|
||||
#include "./subtree.h"
|
||||
#include "./error_costs.h"
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct Stack Stack;
|
||||
|
||||
typedef unsigned StackVersion;
|
||||
#define STACK_VERSION_NONE ((StackVersion)-1)
|
||||
|
||||
typedef struct {
|
||||
SubtreeArray subtrees;
|
||||
StackVersion version;
|
||||
} StackSlice;
|
||||
typedef Array(StackSlice) StackSliceArray;
|
||||
|
||||
typedef struct {
|
||||
Length position;
|
||||
unsigned depth;
|
||||
TSStateId state;
|
||||
} StackSummaryEntry;
|
||||
typedef Array(StackSummaryEntry) StackSummary;
|
||||
|
||||
// Create a stack.
|
||||
Stack *ts_stack_new(SubtreePool *);
|
||||
|
||||
// Release the memory reserved for a given stack.
|
||||
void ts_stack_delete(Stack *);
|
||||
|
||||
// Get the stack's current number of versions.
|
||||
uint32_t ts_stack_version_count(const Stack *);
|
||||
|
||||
// Get the state at the top of the given version of the stack. If the stack is
|
||||
// empty, this returns the initial state, 0.
|
||||
TSStateId ts_stack_state(const Stack *, StackVersion);
|
||||
|
||||
// Get the last external token associated with a given version of the stack.
|
||||
Subtree ts_stack_last_external_token(const Stack *, StackVersion);
|
||||
|
||||
// Set the last external token associated with a given version of the stack.
|
||||
void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
|
||||
|
||||
// Get the position of the given version of the stack within the document.
|
||||
Length ts_stack_position(const Stack *, StackVersion);
|
||||
|
||||
// Push a tree and state onto the given version of the stack.
|
||||
//
|
||||
// This transfers ownership of the tree to the Stack. Callers that
|
||||
// need to retain ownership of the tree for their own purposes should
|
||||
// first retain the tree.
|
||||
void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
|
||||
|
||||
// Pop the given number of entries from the given version of the stack. This
|
||||
// operation can increase the number of stack versions by revealing multiple
|
||||
// versions which had previously been merged. It returns an array that
|
||||
// specifies the index of each revealed version and the trees that were
|
||||
// removed from that version.
|
||||
StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
|
||||
|
||||
// Remove an error at the top of the given version of the stack.
|
||||
SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
|
||||
|
||||
// Remove any pending trees from the top of the given version of the stack.
|
||||
StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
|
||||
|
||||
// Remove any all trees from the given version of the stack.
|
||||
StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
|
||||
|
||||
// Get the maximum number of tree nodes reachable from this version of the stack
|
||||
// since the last error was detected.
|
||||
unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
|
||||
|
||||
int ts_stack_dynamic_precedence(Stack *, StackVersion);
|
||||
|
||||
bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
|
||||
|
||||
// Compute a summary of all the parse states near the top of the given
|
||||
// version of the stack and store the summary for later retrieval.
|
||||
void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
|
||||
|
||||
// Retrieve a summary of all the parse states near the top of the
|
||||
// given version of the stack.
|
||||
StackSummary *ts_stack_get_summary(Stack *, StackVersion);
|
||||
|
||||
// Get the total cost of all errors on the given version of the stack.
|
||||
unsigned ts_stack_error_cost(const Stack *, StackVersion version);
|
||||
|
||||
// Merge the given two stack versions if possible, returning true
|
||||
// if they were successfully merged and false otherwise.
|
||||
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
|
||||
|
||||
// Determine whether the given two stack versions can be merged.
|
||||
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
|
||||
|
||||
Subtree ts_stack_resume(Stack *, StackVersion);
|
||||
|
||||
void ts_stack_pause(Stack *, StackVersion, Subtree);
|
||||
|
||||
void ts_stack_halt(Stack *, StackVersion);
|
||||
|
||||
bool ts_stack_is_active(const Stack *, StackVersion);
|
||||
|
||||
bool ts_stack_is_paused(const Stack *, StackVersion);
|
||||
|
||||
bool ts_stack_is_halted(const Stack *, StackVersion);
|
||||
|
||||
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
|
||||
|
||||
void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
|
||||
|
||||
StackVersion ts_stack_copy_version(Stack *, StackVersion);
|
||||
|
||||
// Remove the given version from the stack.
|
||||
void ts_stack_remove_version(Stack *, StackVersion);
|
||||
|
||||
void ts_stack_clear(Stack *);
|
||||
|
||||
bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
|
||||
|
||||
typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_PARSE_STACK_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,382 @@
|
||||
#ifndef TREE_SITTER_SUBTREE_H_
|
||||
#define TREE_SITTER_SUBTREE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include "./length.h"
|
||||
#include "./array.h"
|
||||
#include "./error_costs.h"
|
||||
#include "./host.h"
|
||||
#include "tree_sitter/api.h"
|
||||
#include "./parser.h"
|
||||
|
||||
#define TS_TREE_STATE_NONE USHRT_MAX
|
||||
#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
|
||||
|
||||
// The serialized state of an external scanner.
|
||||
//
|
||||
// Every time an external token subtree is created after a call to an
|
||||
// external scanner, the scanner's `serialize` function is called to
|
||||
// retrieve a serialized copy of its state. The bytes are then copied
|
||||
// onto the subtree itself so that the scanner's state can later be
|
||||
// restored using its `deserialize` function.
|
||||
//
|
||||
// Small byte arrays are stored inline, and long ones are allocated
|
||||
// separately on the heap.
|
||||
typedef struct {
|
||||
union {
|
||||
char *long_data;
|
||||
char short_data[24];
|
||||
};
|
||||
uint32_t length;
|
||||
} ExternalScannerState;
|
||||
|
||||
// A compact representation of a subtree.
|
||||
//
|
||||
// This representation is used for small leaf nodes that are not
|
||||
// errors, and were not created by an external scanner.
|
||||
//
|
||||
// The idea behind the layout of this struct is that the `is_inline`
|
||||
// bit will fall exactly into the same location as the least significant
|
||||
// bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
|
||||
// Because of alignment, for any valid pointer this will be 0, giving
|
||||
// us the opportunity to make use of this bit to signify whether to use
|
||||
// the pointer or the inline struct.
|
||||
typedef struct SubtreeInlineData SubtreeInlineData;
|
||||
|
||||
#define SUBTREE_BITS \
|
||||
bool visible : 1; \
|
||||
bool named : 1; \
|
||||
bool extra : 1; \
|
||||
bool has_changes : 1; \
|
||||
bool is_missing : 1; \
|
||||
bool is_keyword : 1;
|
||||
|
||||
#define SUBTREE_SIZE \
|
||||
uint8_t padding_columns; \
|
||||
uint8_t padding_rows : 4; \
|
||||
uint8_t lookahead_bytes : 4; \
|
||||
uint8_t padding_bytes; \
|
||||
uint8_t size_bytes;
|
||||
|
||||
#if TS_BIG_ENDIAN
|
||||
#if TS_PTR_SIZE == 32
|
||||
|
||||
struct SubtreeInlineData {
|
||||
uint16_t parse_state;
|
||||
uint8_t symbol;
|
||||
SUBTREE_BITS
|
||||
bool unused : 1;
|
||||
bool is_inline : 1;
|
||||
SUBTREE_SIZE
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
struct SubtreeInlineData {
|
||||
SUBTREE_SIZE
|
||||
uint16_t parse_state;
|
||||
uint8_t symbol;
|
||||
SUBTREE_BITS
|
||||
bool unused : 1;
|
||||
bool is_inline : 1;
|
||||
};
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
struct SubtreeInlineData {
|
||||
bool is_inline : 1;
|
||||
SUBTREE_BITS
|
||||
uint8_t symbol;
|
||||
uint16_t parse_state;
|
||||
SUBTREE_SIZE
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#undef SUBTREE_BITS
|
||||
#undef SUBTREE_SIZE
|
||||
|
||||
// A heap-allocated representation of a subtree.
|
||||
//
|
||||
// This representation is used for parent nodes, external tokens,
|
||||
// errors, and other leaf nodes whose data is too large to fit into
|
||||
// the inline representation.
|
||||
typedef struct {
|
||||
volatile uint32_t ref_count;
|
||||
Length padding;
|
||||
Length size;
|
||||
uint32_t lookahead_bytes;
|
||||
uint32_t error_cost;
|
||||
uint32_t child_count;
|
||||
TSSymbol symbol;
|
||||
TSStateId parse_state;
|
||||
|
||||
bool visible : 1;
|
||||
bool named : 1;
|
||||
bool extra : 1;
|
||||
bool fragile_left : 1;
|
||||
bool fragile_right : 1;
|
||||
bool has_changes : 1;
|
||||
bool has_external_tokens : 1;
|
||||
bool has_external_scanner_state_change : 1;
|
||||
bool depends_on_column: 1;
|
||||
bool is_missing : 1;
|
||||
bool is_keyword : 1;
|
||||
|
||||
union {
|
||||
// Non-terminal subtrees (`child_count > 0`)
|
||||
struct {
|
||||
uint32_t visible_child_count;
|
||||
uint32_t named_child_count;
|
||||
uint32_t visible_descendant_count;
|
||||
int32_t dynamic_precedence;
|
||||
uint16_t repeat_depth;
|
||||
uint16_t production_id;
|
||||
struct {
|
||||
TSSymbol symbol;
|
||||
TSStateId parse_state;
|
||||
} first_leaf;
|
||||
};
|
||||
|
||||
// External terminal subtrees (`child_count == 0 && has_external_tokens`)
|
||||
ExternalScannerState external_scanner_state;
|
||||
|
||||
// Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
|
||||
int32_t lookahead_char;
|
||||
};
|
||||
} SubtreeHeapData;
|
||||
|
||||
// The fundamental building block of a syntax tree.
|
||||
typedef union {
|
||||
SubtreeInlineData data;
|
||||
const SubtreeHeapData *ptr;
|
||||
} Subtree;
|
||||
|
||||
// Like Subtree, but mutable.
|
||||
typedef union {
|
||||
SubtreeInlineData data;
|
||||
SubtreeHeapData *ptr;
|
||||
} MutableSubtree;
|
||||
|
||||
typedef Array(Subtree) SubtreeArray;
|
||||
typedef Array(MutableSubtree) MutableSubtreeArray;
|
||||
|
||||
typedef struct {
|
||||
MutableSubtreeArray free_trees;
|
||||
MutableSubtreeArray tree_stack;
|
||||
} SubtreePool;
|
||||
|
||||
void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
|
||||
const char *ts_external_scanner_state_data(const ExternalScannerState *);
|
||||
bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned);
|
||||
void ts_external_scanner_state_delete(ExternalScannerState *self);
|
||||
|
||||
void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
|
||||
void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
|
||||
void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
|
||||
void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
|
||||
void ts_subtree_array_reverse(SubtreeArray *);
|
||||
|
||||
SubtreePool ts_subtree_pool_new(uint32_t capacity);
|
||||
void ts_subtree_pool_delete(SubtreePool *);
|
||||
|
||||
Subtree ts_subtree_new_leaf(
|
||||
SubtreePool *, TSSymbol, Length, Length, uint32_t,
|
||||
TSStateId, bool, bool, bool, const TSLanguage *
|
||||
);
|
||||
Subtree ts_subtree_new_error(
|
||||
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
|
||||
);
|
||||
MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
|
||||
Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
|
||||
Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *);
|
||||
MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
|
||||
void ts_subtree_retain(Subtree);
|
||||
void ts_subtree_release(SubtreePool *, Subtree);
|
||||
int ts_subtree_compare(Subtree, Subtree, SubtreePool *);
|
||||
void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
|
||||
void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
|
||||
void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
|
||||
void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
|
||||
Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
|
||||
char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all);
|
||||
void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
|
||||
Subtree ts_subtree_last_external_token(Subtree);
|
||||
const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
|
||||
bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
|
||||
|
||||
#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)
|
||||
|
||||
static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
|
||||
static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
|
||||
static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
|
||||
static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
|
||||
static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
|
||||
static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
|
||||
static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
|
||||
static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
|
||||
static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
|
||||
|
||||
#undef SUBTREE_GET
|
||||
|
||||
// Get the size needed to store a heap-allocated subtree with the given
|
||||
// number of children.
|
||||
static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
|
||||
return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
|
||||
}
|
||||
|
||||
// Get a subtree's children, which are allocated immediately before the
|
||||
// tree's own heap data.
|
||||
#define ts_subtree_children(self) \
|
||||
((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
|
||||
|
||||
static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
|
||||
if (self->data.is_inline) {
|
||||
self->data.extra = is_extra;
|
||||
} else {
|
||||
self->ptr->extra = is_extra;
|
||||
}
|
||||
}
|
||||
|
||||
static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
|
||||
if (self.data.is_inline) return self.data.symbol;
|
||||
if (self.ptr->child_count == 0) return self.ptr->symbol;
|
||||
return self.ptr->first_leaf.symbol;
|
||||
}
|
||||
|
||||
static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
|
||||
if (self.data.is_inline) return self.data.parse_state;
|
||||
if (self.ptr->child_count == 0) return self.ptr->parse_state;
|
||||
return self.ptr->first_leaf.parse_state;
|
||||
}
|
||||
|
||||
static inline Length ts_subtree_padding(Subtree self) {
|
||||
if (self.data.is_inline) {
|
||||
Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
|
||||
return result;
|
||||
} else {
|
||||
return self.ptr->padding;
|
||||
}
|
||||
}
|
||||
|
||||
static inline Length ts_subtree_size(Subtree self) {
|
||||
if (self.data.is_inline) {
|
||||
Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
|
||||
return result;
|
||||
} else {
|
||||
return self.ptr->size;
|
||||
}
|
||||
}
|
||||
|
||||
static inline Length ts_subtree_total_size(Subtree self) {
|
||||
return length_add(ts_subtree_padding(self), ts_subtree_size(self));
|
||||
}
|
||||
|
||||
static inline uint32_t ts_subtree_total_bytes(Subtree self) {
|
||||
return ts_subtree_total_size(self).bytes;
|
||||
}
|
||||
|
||||
static inline uint32_t ts_subtree_child_count(Subtree self) {
|
||||
return self.data.is_inline ? 0 : self.ptr->child_count;
|
||||
}
|
||||
|
||||
static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
|
||||
return self.data.is_inline ? 0 : self.ptr->repeat_depth;
|
||||
}
|
||||
|
||||
static inline uint32_t ts_subtree_is_repetition(Subtree self) {
|
||||
return self.data.is_inline
|
||||
? 0
|
||||
: !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
|
||||
}
|
||||
|
||||
static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
|
||||
return (self.data.is_inline || self.ptr->child_count == 0)
|
||||
? 0
|
||||
: self.ptr->visible_descendant_count;
|
||||
}
|
||||
|
||||
static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
|
||||
if (ts_subtree_child_count(self) > 0) {
|
||||
return self.ptr->visible_child_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t ts_subtree_error_cost(Subtree self) {
|
||||
if (ts_subtree_missing(self)) {
|
||||
return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
|
||||
} else {
|
||||
return self.data.is_inline ? 0 : self.ptr->error_cost;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
|
||||
return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
|
||||
}
|
||||
|
||||
static inline uint16_t ts_subtree_production_id(Subtree self) {
|
||||
if (ts_subtree_child_count(self) > 0) {
|
||||
return self.ptr->production_id;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_fragile_left(Subtree self) {
|
||||
return self.data.is_inline ? false : self.ptr->fragile_left;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_fragile_right(Subtree self) {
|
||||
return self.data.is_inline ? false : self.ptr->fragile_right;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_has_external_tokens(Subtree self) {
|
||||
return self.data.is_inline ? false : self.ptr->has_external_tokens;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
|
||||
return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_depends_on_column(Subtree self) {
|
||||
return self.data.is_inline ? false : self.ptr->depends_on_column;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_is_fragile(Subtree self) {
|
||||
return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_is_error(Subtree self) {
|
||||
return ts_subtree_symbol(self) == ts_builtin_sym_error;
|
||||
}
|
||||
|
||||
static inline bool ts_subtree_is_eof(Subtree self) {
|
||||
return ts_subtree_symbol(self) == ts_builtin_sym_end;
|
||||
}
|
||||
|
||||
static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
|
||||
Subtree result;
|
||||
result.data = self.data;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
|
||||
MutableSubtree result;
|
||||
result.data = self.data;
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_SUBTREE_H_
|
@ -0,0 +1,165 @@
|
||||
#define _POSIX_C_SOURCE 200112L
|
||||
|
||||
#include "tree_sitter/api.h"
|
||||
#include "./array.h"
|
||||
#include "./get_changed_ranges.h"
|
||||
#include "./length.h"
|
||||
#include "./subtree.h"
|
||||
#include "./tree_cursor.h"
|
||||
#include "./tree.h"
|
||||
|
||||
TSTree *ts_tree_new(
|
||||
Subtree root, const TSLanguage *language,
|
||||
const TSRange *included_ranges, unsigned included_range_count
|
||||
) {
|
||||
TSTree *result = ts_malloc(sizeof(TSTree));
|
||||
result->root = root;
|
||||
result->language = ts_language_copy(language);
|
||||
result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
|
||||
memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
|
||||
result->included_range_count = included_range_count;
|
||||
return result;
|
||||
}
|
||||
|
||||
TSTree *ts_tree_copy(const TSTree *self) {
|
||||
ts_subtree_retain(self->root);
|
||||
return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
|
||||
}
|
||||
|
||||
void ts_tree_delete(TSTree *self) {
|
||||
if (!self) return;
|
||||
|
||||
SubtreePool pool = ts_subtree_pool_new(0);
|
||||
ts_subtree_release(&pool, self->root);
|
||||
ts_subtree_pool_delete(&pool);
|
||||
ts_language_delete(self->language);
|
||||
ts_free(self->included_ranges);
|
||||
ts_free(self);
|
||||
}
|
||||
|
||||
TSNode ts_tree_root_node(const TSTree *self) {
|
||||
return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
|
||||
}
|
||||
|
||||
TSNode ts_tree_root_node_with_offset(
|
||||
const TSTree *self,
|
||||
uint32_t offset_bytes,
|
||||
TSPoint offset_extent
|
||||
) {
|
||||
Length offset = {offset_bytes, offset_extent};
|
||||
return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
|
||||
}
|
||||
|
||||
const TSLanguage *ts_tree_language(const TSTree *self) {
|
||||
return self->language;
|
||||
}
|
||||
|
||||
void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
|
||||
for (unsigned i = 0; i < self->included_range_count; i++) {
|
||||
TSRange *range = &self->included_ranges[i];
|
||||
if (range->end_byte >= edit->old_end_byte) {
|
||||
if (range->end_byte != UINT32_MAX) {
|
||||
range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
|
||||
range->end_point = point_add(
|
||||
edit->new_end_point,
|
||||
point_sub(range->end_point, edit->old_end_point)
|
||||
);
|
||||
if (range->end_byte < edit->new_end_byte) {
|
||||
range->end_byte = UINT32_MAX;
|
||||
range->end_point = POINT_MAX;
|
||||
}
|
||||
}
|
||||
} else if (range->end_byte > edit->start_byte) {
|
||||
range->end_byte = edit->start_byte;
|
||||
range->end_point = edit->start_point;
|
||||
}
|
||||
if (range->start_byte >= edit->old_end_byte) {
|
||||
range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
|
||||
range->start_point = point_add(
|
||||
edit->new_end_point,
|
||||
point_sub(range->start_point, edit->old_end_point)
|
||||
);
|
||||
if (range->start_byte < edit->new_end_byte) {
|
||||
range->start_byte = UINT32_MAX;
|
||||
range->start_point = POINT_MAX;
|
||||
}
|
||||
} else if (range->start_byte > edit->start_byte) {
|
||||
range->start_byte = edit->start_byte;
|
||||
range->start_point = edit->start_point;
|
||||
}
|
||||
}
|
||||
|
||||
SubtreePool pool = ts_subtree_pool_new(0);
|
||||
self->root = ts_subtree_edit(self->root, edit, &pool);
|
||||
ts_subtree_pool_delete(&pool);
|
||||
}
|
||||
|
||||
TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
|
||||
*length = self->included_range_count;
|
||||
TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
|
||||
memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
|
||||
return ranges;
|
||||
}
|
||||
|
||||
TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) {
|
||||
TreeCursor cursor1 = {NULL, array_new(), 0};
|
||||
TreeCursor cursor2 = {NULL, array_new(), 0};
|
||||
ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree));
|
||||
ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree));
|
||||
|
||||
TSRangeArray included_range_differences = array_new();
|
||||
ts_range_array_get_changed_ranges(
|
||||
old_tree->included_ranges, old_tree->included_range_count,
|
||||
new_tree->included_ranges, new_tree->included_range_count,
|
||||
&included_range_differences
|
||||
);
|
||||
|
||||
TSRange *result;
|
||||
*length = ts_subtree_get_changed_ranges(
|
||||
&old_tree->root, &new_tree->root, &cursor1, &cursor2,
|
||||
old_tree->language, &included_range_differences, &result
|
||||
);
|
||||
|
||||
array_delete(&included_range_differences);
|
||||
array_delete(&cursor1.stack);
|
||||
array_delete(&cursor2.stack);
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <io.h>
|
||||
#include <windows.h>
|
||||
|
||||
int _ts_dup(HANDLE handle) {
|
||||
HANDLE dup_handle;
|
||||
if (!DuplicateHandle(
|
||||
GetCurrentProcess(), handle,
|
||||
GetCurrentProcess(), &dup_handle,
|
||||
0, FALSE, DUPLICATE_SAME_ACCESS
|
||||
)) return -1;
|
||||
|
||||
return _open_osfhandle((intptr_t)dup_handle, 0);
|
||||
}
|
||||
|
||||
void ts_tree_print_dot_graph(const TSTree *self, int fd) {
|
||||
FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a");
|
||||
ts_subtree_print_dot_graph(self->root, self->language, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
int _ts_dup(int file_descriptor) {
|
||||
return dup(file_descriptor);
|
||||
}
|
||||
|
||||
void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
|
||||
FILE *file = fdopen(_ts_dup(file_descriptor), "a");
|
||||
ts_subtree_print_dot_graph(self->root, self->language, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,31 @@
|
||||
#ifndef TREE_SITTER_TREE_H_
|
||||
#define TREE_SITTER_TREE_H_
|
||||
|
||||
#include "./subtree.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
const Subtree *child;
|
||||
const Subtree *parent;
|
||||
Length position;
|
||||
TSSymbol alias_symbol;
|
||||
} ParentCacheEntry;
|
||||
|
||||
struct TSTree {
|
||||
Subtree root;
|
||||
const TSLanguage *language;
|
||||
TSRange *included_ranges;
|
||||
unsigned included_range_count;
|
||||
};
|
||||
|
||||
TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
|
||||
TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_TREE_H_
|
@ -0,0 +1,714 @@
|
||||
#include "tree_sitter/api.h"
|
||||
#include "./alloc.h"
|
||||
#include "./tree_cursor.h"
|
||||
#include "./language.h"
|
||||
#include "./tree.h"
|
||||
|
||||
typedef struct {
|
||||
Subtree parent;
|
||||
const TSTree *tree;
|
||||
Length position;
|
||||
uint32_t child_index;
|
||||
uint32_t structural_child_index;
|
||||
uint32_t descendant_index;
|
||||
const TSSymbol *alias_sequence;
|
||||
} CursorChildIterator;
|
||||
|
||||
// CursorChildIterator
|
||||
|
||||
static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) {
|
||||
TreeCursorEntry *entry = &self->stack.contents[index];
|
||||
if (index == 0 || ts_subtree_visible(*entry->subtree)) {
|
||||
return true;
|
||||
} else if (!ts_subtree_extra(*entry->subtree)) {
|
||||
TreeCursorEntry *parent_entry = &self->stack.contents[index - 1];
|
||||
return ts_language_alias_at(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
entry->structural_child_index
|
||||
);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
|
||||
TreeCursorEntry *last_entry = array_back(&self->stack);
|
||||
if (ts_subtree_child_count(*last_entry->subtree) == 0) {
|
||||
return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL};
|
||||
}
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->tree->language,
|
||||
last_entry->subtree->ptr->production_id
|
||||
);
|
||||
|
||||
uint32_t descendant_index = last_entry->descendant_index;
|
||||
if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) {
|
||||
descendant_index += 1;
|
||||
}
|
||||
|
||||
return (CursorChildIterator) {
|
||||
.tree = self->tree,
|
||||
.parent = *last_entry->subtree,
|
||||
.position = last_entry->position,
|
||||
.child_index = 0,
|
||||
.structural_child_index = 0,
|
||||
.descendant_index = descendant_index,
|
||||
.alias_sequence = alias_sequence,
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool ts_tree_cursor_child_iterator_next(
|
||||
CursorChildIterator *self,
|
||||
TreeCursorEntry *result,
|
||||
bool *visible
|
||||
) {
|
||||
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
|
||||
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
|
||||
*result = (TreeCursorEntry) {
|
||||
.subtree = child,
|
||||
.position = self->position,
|
||||
.child_index = self->child_index,
|
||||
.structural_child_index = self->structural_child_index,
|
||||
.descendant_index = self->descendant_index,
|
||||
};
|
||||
*visible = ts_subtree_visible(*child);
|
||||
bool extra = ts_subtree_extra(*child);
|
||||
if (!extra) {
|
||||
if (self->alias_sequence) {
|
||||
*visible |= self->alias_sequence[self->structural_child_index];
|
||||
}
|
||||
self->structural_child_index++;
|
||||
}
|
||||
|
||||
self->descendant_index += ts_subtree_visible_descendant_count(*child);
|
||||
if (*visible) {
|
||||
self->descendant_index += 1;
|
||||
}
|
||||
|
||||
self->position = length_add(self->position, ts_subtree_size(*child));
|
||||
self->child_index++;
|
||||
|
||||
if (self->child_index < self->parent.ptr->child_count) {
|
||||
Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
|
||||
self->position = length_add(self->position, ts_subtree_padding(next_child));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return a position that, when `b` is added to it, yields `a`. This
|
||||
// can only be computed if `b` has zero rows. Otherwise, this function
|
||||
// returns `LENGTH_UNDEFINED`, and the caller needs to recompute
|
||||
// the position some other way.
|
||||
static inline Length length_backtrack(Length a, Length b) {
|
||||
if (length_is_undefined(a) || b.extent.row != 0) {
|
||||
return LENGTH_UNDEFINED;
|
||||
}
|
||||
|
||||
Length result;
|
||||
result.bytes = a.bytes - b.bytes;
|
||||
result.extent.row = a.extent.row;
|
||||
result.extent.column = a.extent.column - b.extent.column;
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline bool ts_tree_cursor_child_iterator_previous(
|
||||
CursorChildIterator *self,
|
||||
TreeCursorEntry *result,
|
||||
bool *visible
|
||||
) {
|
||||
// this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into
|
||||
// account unsigned underflow
|
||||
if (!self->parent.ptr || (int8_t)self->child_index == -1) return false;
|
||||
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
|
||||
*result = (TreeCursorEntry) {
|
||||
.subtree = child,
|
||||
.position = self->position,
|
||||
.child_index = self->child_index,
|
||||
.structural_child_index = self->structural_child_index,
|
||||
};
|
||||
*visible = ts_subtree_visible(*child);
|
||||
bool extra = ts_subtree_extra(*child);
|
||||
if (!extra && self->alias_sequence) {
|
||||
*visible |= self->alias_sequence[self->structural_child_index];
|
||||
self->structural_child_index--;
|
||||
}
|
||||
|
||||
self->position = length_backtrack(self->position, ts_subtree_padding(*child));
|
||||
self->child_index--;
|
||||
|
||||
// unsigned can underflow so compare it to child_count
|
||||
if (self->child_index < self->parent.ptr->child_count) {
|
||||
Subtree previous_child = ts_subtree_children(self->parent)[self->child_index];
|
||||
Length size = ts_subtree_size(previous_child);
|
||||
self->position = length_backtrack(self->position, size);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// TSTreeCursor - lifecycle
|
||||
|
||||
TSTreeCursor ts_tree_cursor_new(TSNode node) {
|
||||
TSTreeCursor self = {NULL, NULL, {0, 0, 0}};
|
||||
ts_tree_cursor_init((TreeCursor *)&self, node);
|
||||
return self;
|
||||
}
|
||||
|
||||
void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
|
||||
ts_tree_cursor_init((TreeCursor *)_self, node);
|
||||
}
|
||||
|
||||
void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
|
||||
self->tree = node.tree;
|
||||
self->root_alias_symbol = node.context[3];
|
||||
array_clear(&self->stack);
|
||||
array_push(&self->stack, ((TreeCursorEntry) {
|
||||
.subtree = (const Subtree *)node.id,
|
||||
.position = {
|
||||
ts_node_start_byte(node),
|
||||
ts_node_start_point(node)
|
||||
},
|
||||
.child_index = 0,
|
||||
.structural_child_index = 0,
|
||||
.descendant_index = 0,
|
||||
}));
|
||||
}
|
||||
|
||||
void ts_tree_cursor_delete(TSTreeCursor *_self) {
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
array_delete(&self->stack);
|
||||
}
|
||||
|
||||
// TSTreeCursor - walking the tree
|
||||
|
||||
TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) {
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
bool visible;
|
||||
TreeCursorEntry entry;
|
||||
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
|
||||
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
|
||||
if (visible) {
|
||||
array_push(&self->stack, entry);
|
||||
return TreeCursorStepVisible;
|
||||
}
|
||||
if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
|
||||
array_push(&self->stack, entry);
|
||||
return TreeCursorStepHidden;
|
||||
}
|
||||
}
|
||||
return TreeCursorStepNone;
|
||||
}
|
||||
|
||||
bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
|
||||
for (;;) {
|
||||
switch (ts_tree_cursor_goto_first_child_internal(self)) {
|
||||
case TreeCursorStepHidden:
|
||||
continue;
|
||||
case TreeCursorStepVisible:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) {
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
bool visible;
|
||||
TreeCursorEntry entry;
|
||||
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
|
||||
if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone;
|
||||
|
||||
TreeCursorEntry last_entry = {0};
|
||||
TreeCursorStep last_step = TreeCursorStepNone;
|
||||
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
|
||||
if (visible) {
|
||||
last_entry = entry;
|
||||
last_step = TreeCursorStepVisible;
|
||||
}
|
||||
else if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
|
||||
last_entry = entry;
|
||||
last_step = TreeCursorStepHidden;
|
||||
}
|
||||
}
|
||||
if (last_entry.subtree) {
|
||||
array_push(&self->stack, last_entry);
|
||||
return last_step;
|
||||
}
|
||||
|
||||
return TreeCursorStepNone;
|
||||
}
|
||||
|
||||
bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) {
|
||||
for (;;) {
|
||||
switch (ts_tree_cursor_goto_last_child_internal(self)) {
|
||||
case TreeCursorStepHidden:
|
||||
continue;
|
||||
case TreeCursorStepVisible:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point(
|
||||
TSTreeCursor *_self,
|
||||
uint32_t goal_byte,
|
||||
TSPoint goal_point
|
||||
) {
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
uint32_t initial_size = self->stack.size;
|
||||
uint32_t visible_child_index = 0;
|
||||
|
||||
bool did_descend;
|
||||
do {
|
||||
did_descend = false;
|
||||
|
||||
bool visible;
|
||||
TreeCursorEntry entry;
|
||||
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
|
||||
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
|
||||
Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree));
|
||||
bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point);
|
||||
uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
|
||||
if (at_goal) {
|
||||
if (visible) {
|
||||
array_push(&self->stack, entry);
|
||||
return visible_child_index;
|
||||
}
|
||||
if (visible_child_count > 0) {
|
||||
array_push(&self->stack, entry);
|
||||
did_descend = true;
|
||||
break;
|
||||
}
|
||||
} else if (visible) {
|
||||
visible_child_index++;
|
||||
} else {
|
||||
visible_child_index += visible_child_count;
|
||||
}
|
||||
}
|
||||
} while (did_descend);
|
||||
|
||||
self->stack.size = initial_size;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) {
|
||||
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO);
|
||||
}
|
||||
|
||||
int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) {
|
||||
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point);
|
||||
}
|
||||
|
||||
TreeCursorStep ts_tree_cursor_goto_sibling_internal(
|
||||
TSTreeCursor *_self,
|
||||
bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) {
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
uint32_t initial_size = self->stack.size;
|
||||
|
||||
while (self->stack.size > 1) {
|
||||
TreeCursorEntry entry = array_pop(&self->stack);
|
||||
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
|
||||
iterator.child_index = entry.child_index;
|
||||
iterator.structural_child_index = entry.structural_child_index;
|
||||
iterator.position = entry.position;
|
||||
iterator.descendant_index = entry.descendant_index;
|
||||
|
||||
bool visible = false;
|
||||
advance(&iterator, &entry, &visible);
|
||||
if (visible && self->stack.size + 1 < initial_size) break;
|
||||
|
||||
while (advance(&iterator, &entry, &visible)) {
|
||||
if (visible) {
|
||||
array_push(&self->stack, entry);
|
||||
return TreeCursorStepVisible;
|
||||
}
|
||||
|
||||
if (ts_subtree_visible_child_count(*entry.subtree)) {
|
||||
array_push(&self->stack, entry);
|
||||
return TreeCursorStepHidden;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self->stack.size = initial_size;
|
||||
return TreeCursorStepNone;
|
||||
}
|
||||
|
||||
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) {
|
||||
return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next);
|
||||
}
|
||||
|
||||
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
|
||||
switch (ts_tree_cursor_goto_next_sibling_internal(self)) {
|
||||
case TreeCursorStepHidden:
|
||||
ts_tree_cursor_goto_first_child(self);
|
||||
return true;
|
||||
case TreeCursorStepVisible:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) {
|
||||
// since subtracting across row loses column information, we may have to
|
||||
// restore it
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
|
||||
// for that, save current position before traversing
|
||||
TreeCursorStep step = ts_tree_cursor_goto_sibling_internal(
|
||||
_self, ts_tree_cursor_child_iterator_previous);
|
||||
if (step == TreeCursorStepNone)
|
||||
return step;
|
||||
|
||||
// if length is already valid, there's no need to recompute it
|
||||
if (!length_is_undefined(array_back(&self->stack)->position))
|
||||
return step;
|
||||
|
||||
// restore position from the parent node
|
||||
const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2];
|
||||
Length position = parent->position;
|
||||
uint32_t child_index = array_back(&self->stack)->child_index;
|
||||
const Subtree *children = ts_subtree_children((*(parent->subtree)));
|
||||
|
||||
if (child_index > 0) {
|
||||
// skip first child padding since its position should match the position of the parent
|
||||
position = length_add(position, ts_subtree_size(children[0]));
|
||||
for (uint32_t i = 1; i < child_index; ++i) {
|
||||
position = length_add(position, ts_subtree_total_size(children[i]));
|
||||
}
|
||||
position = length_add(position, ts_subtree_padding(children[child_index]));
|
||||
}
|
||||
|
||||
array_back(&self->stack)->position = position;
|
||||
|
||||
return step;
|
||||
}
|
||||
|
||||
bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) {
|
||||
switch (ts_tree_cursor_goto_previous_sibling_internal(self)) {
|
||||
case TreeCursorStepHidden:
|
||||
ts_tree_cursor_goto_last_child(self);
|
||||
return true;
|
||||
case TreeCursorStepVisible:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
|
||||
if (ts_tree_cursor_is_entry_visible(self, i)) {
|
||||
self->stack.size = i + 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ts_tree_cursor_goto_descendant(
|
||||
TSTreeCursor *_self,
|
||||
uint32_t goal_descendant_index
|
||||
) {
|
||||
TreeCursor *self = (TreeCursor *)_self;
|
||||
|
||||
// Ascend to the lowest ancestor that contains the goal node.
|
||||
for (;;) {
|
||||
uint32_t i = self->stack.size - 1;
|
||||
TreeCursorEntry *entry = &self->stack.contents[i];
|
||||
uint32_t next_descendant_index =
|
||||
entry->descendant_index +
|
||||
(ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) +
|
||||
ts_subtree_visible_descendant_count(*entry->subtree);
|
||||
if (
|
||||
(entry->descendant_index <= goal_descendant_index) &&
|
||||
(next_descendant_index > goal_descendant_index)
|
||||
) {
|
||||
break;
|
||||
} else if (self->stack.size <= 1) {
|
||||
return;
|
||||
} else {
|
||||
self->stack.size--;
|
||||
}
|
||||
}
|
||||
|
||||
// Descend to the goal node.
|
||||
bool did_descend = true;
|
||||
do {
|
||||
did_descend = false;
|
||||
bool visible;
|
||||
TreeCursorEntry entry;
|
||||
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
|
||||
if (iterator.descendant_index > goal_descendant_index) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
|
||||
if (iterator.descendant_index > goal_descendant_index) {
|
||||
array_push(&self->stack, entry);
|
||||
if (visible && entry.descendant_index == goal_descendant_index) {
|
||||
return;
|
||||
} else {
|
||||
did_descend = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (did_descend);
|
||||
}
|
||||
|
||||
uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
TreeCursorEntry *last_entry = array_back(&self->stack);
|
||||
return last_entry->descendant_index;
|
||||
}
|
||||
|
||||
TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
TreeCursorEntry *last_entry = array_back(&self->stack);
|
||||
TSSymbol alias_symbol = self->root_alias_symbol;
|
||||
if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) {
|
||||
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
|
||||
alias_symbol = ts_language_alias_at(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
last_entry->structural_child_index
|
||||
);
|
||||
}
|
||||
return ts_node_new(
|
||||
self->tree,
|
||||
last_entry->subtree,
|
||||
last_entry->position,
|
||||
alias_symbol
|
||||
);
|
||||
}
|
||||
|
||||
// Private - Get various facts about the current node that are needed
|
||||
// when executing tree queries.
|
||||
void ts_tree_cursor_current_status(
|
||||
const TSTreeCursor *_self,
|
||||
TSFieldId *field_id,
|
||||
bool *has_later_siblings,
|
||||
bool *has_later_named_siblings,
|
||||
bool *can_have_later_siblings_with_this_field,
|
||||
TSSymbol *supertypes,
|
||||
unsigned *supertype_count
|
||||
) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
unsigned max_supertypes = *supertype_count;
|
||||
*field_id = 0;
|
||||
*supertype_count = 0;
|
||||
*has_later_siblings = false;
|
||||
*has_later_named_siblings = false;
|
||||
*can_have_later_siblings_with_this_field = false;
|
||||
|
||||
// Walk up the tree, visiting the current node and its invisible ancestors,
|
||||
// because fields can refer to nodes through invisible *wrapper* nodes,
|
||||
for (unsigned i = self->stack.size - 1; i > 0; i--) {
|
||||
TreeCursorEntry *entry = &self->stack.contents[i];
|
||||
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
|
||||
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id
|
||||
);
|
||||
|
||||
#define subtree_symbol(subtree, structural_child_index) \
|
||||
(( \
|
||||
!ts_subtree_extra(subtree) && \
|
||||
alias_sequence && \
|
||||
alias_sequence[structural_child_index] \
|
||||
) ? \
|
||||
alias_sequence[structural_child_index] : \
|
||||
ts_subtree_symbol(subtree))
|
||||
|
||||
// Stop walking up when a visible ancestor is found.
|
||||
TSSymbol entry_symbol = subtree_symbol(
|
||||
*entry->subtree,
|
||||
entry->structural_child_index
|
||||
);
|
||||
TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
|
||||
self->tree->language,
|
||||
entry_symbol
|
||||
);
|
||||
if (i != self->stack.size - 1 && entry_metadata.visible) break;
|
||||
|
||||
// Record any supertypes
|
||||
if (entry_metadata.supertype && *supertype_count < max_supertypes) {
|
||||
supertypes[*supertype_count] = entry_symbol;
|
||||
(*supertype_count)++;
|
||||
}
|
||||
|
||||
// Determine if the current node has later siblings.
|
||||
if (!*has_later_siblings) {
|
||||
unsigned sibling_count = parent_entry->subtree->ptr->child_count;
|
||||
unsigned structural_child_index = entry->structural_child_index;
|
||||
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
|
||||
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
|
||||
Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
|
||||
TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
|
||||
self->tree->language,
|
||||
subtree_symbol(sibling, structural_child_index)
|
||||
);
|
||||
if (sibling_metadata.visible) {
|
||||
*has_later_siblings = true;
|
||||
if (*has_later_named_siblings) break;
|
||||
if (sibling_metadata.named) {
|
||||
*has_later_named_siblings = true;
|
||||
break;
|
||||
}
|
||||
} else if (ts_subtree_visible_child_count(sibling) > 0) {
|
||||
*has_later_siblings = true;
|
||||
if (*has_later_named_siblings) break;
|
||||
if (sibling.ptr->named_child_count > 0) {
|
||||
*has_later_named_siblings = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!ts_subtree_extra(sibling)) structural_child_index++;
|
||||
}
|
||||
}
|
||||
|
||||
#undef subtree_symbol
|
||||
|
||||
if (!ts_subtree_extra(*entry->subtree)) {
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
&field_map, &field_map_end
|
||||
);
|
||||
|
||||
// Look for a field name associated with the current node.
|
||||
if (!*field_id) {
|
||||
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
|
||||
if (!map->inherited && map->child_index == entry->structural_child_index) {
|
||||
*field_id = map->field_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine if the current node can have later siblings with the same field name.
|
||||
if (*field_id) {
|
||||
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
|
||||
if (
|
||||
map->field_id == *field_id &&
|
||||
map->child_index > entry->structural_child_index
|
||||
) {
|
||||
*can_have_later_siblings_with_this_field = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
uint32_t depth = 0;
|
||||
for (unsigned i = 1; i < self->stack.size; i++) {
|
||||
if (ts_tree_cursor_is_entry_visible(self, i)) {
|
||||
depth++;
|
||||
}
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
|
||||
TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
for (int i = (int)self->stack.size - 2; i >= 0; i--) {
|
||||
TreeCursorEntry *entry = &self->stack.contents[i];
|
||||
bool is_visible = true;
|
||||
TSSymbol alias_symbol = 0;
|
||||
if (i > 0) {
|
||||
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
|
||||
alias_symbol = ts_language_alias_at(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
entry->structural_child_index
|
||||
);
|
||||
is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree);
|
||||
}
|
||||
if (is_visible) {
|
||||
return ts_node_new(
|
||||
self->tree,
|
||||
entry->subtree,
|
||||
entry->position,
|
||||
alias_symbol
|
||||
);
|
||||
}
|
||||
}
|
||||
return ts_node_new(NULL, NULL, length_zero(), 0);
|
||||
}
|
||||
|
||||
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
|
||||
// Walk up the tree, visiting the current node and its invisible ancestors.
|
||||
for (unsigned i = self->stack.size - 1; i > 0; i--) {
|
||||
TreeCursorEntry *entry = &self->stack.contents[i];
|
||||
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
|
||||
|
||||
// Stop walking up when another visible node is found.
|
||||
if (
|
||||
i != self->stack.size - 1 &&
|
||||
ts_tree_cursor_is_entry_visible(self, i)
|
||||
) break;
|
||||
|
||||
if (ts_subtree_extra(*entry->subtree)) break;
|
||||
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
&field_map, &field_map_end
|
||||
);
|
||||
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
|
||||
if (!map->inherited && map->child_index == entry->structural_child_index) {
|
||||
return map->field_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
|
||||
TSFieldId id = ts_tree_cursor_current_field_id(_self);
|
||||
if (id) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
return self->tree->language->field_names[id];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
|
||||
const TreeCursor *cursor = (const TreeCursor *)_cursor;
|
||||
TSTreeCursor res = {NULL, NULL, {0, 0}};
|
||||
TreeCursor *copy = (TreeCursor *)&res;
|
||||
copy->tree = cursor->tree;
|
||||
copy->root_alias_symbol = cursor->root_alias_symbol;
|
||||
array_init(©->stack);
|
||||
array_push_all(©->stack, &cursor->stack);
|
||||
return res;
|
||||
}
|
||||
|
||||
void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) {
|
||||
const TreeCursor *cursor = (const TreeCursor *)_src;
|
||||
TreeCursor *copy = (TreeCursor *)_dst;
|
||||
copy->tree = cursor->tree;
|
||||
copy->root_alias_symbol = cursor->root_alias_symbol;
|
||||
array_clear(©->stack);
|
||||
array_push_all(©->stack, &cursor->stack);
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
#ifndef TREE_SITTER_TREE_CURSOR_H_
|
||||
#define TREE_SITTER_TREE_CURSOR_H_
|
||||
|
||||
#include "./subtree.h"
|
||||
|
||||
typedef struct {
|
||||
const Subtree *subtree;
|
||||
Length position;
|
||||
uint32_t child_index;
|
||||
uint32_t structural_child_index;
|
||||
uint32_t descendant_index;
|
||||
} TreeCursorEntry;
|
||||
|
||||
typedef struct {
|
||||
const TSTree *tree;
|
||||
Array(TreeCursorEntry) stack;
|
||||
TSSymbol root_alias_symbol;
|
||||
} TreeCursor;
|
||||
|
||||
typedef enum {
|
||||
TreeCursorStepNone,
|
||||
TreeCursorStepHidden,
|
||||
TreeCursorStepVisible,
|
||||
} TreeCursorStep;
|
||||
|
||||
void ts_tree_cursor_init(TreeCursor *, TSNode);
|
||||
void ts_tree_cursor_current_status(
|
||||
const TSTreeCursor *,
|
||||
TSFieldId *,
|
||||
bool *,
|
||||
bool *,
|
||||
bool *,
|
||||
TSSymbol *,
|
||||
unsigned *
|
||||
);
|
||||
|
||||
TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *);
|
||||
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *);
|
||||
|
||||
static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
TreeCursorEntry *last_entry = array_back(&self->stack);
|
||||
return *last_entry->subtree;
|
||||
}
|
||||
|
||||
TSNode ts_tree_cursor_parent_node(const TSTreeCursor *);
|
||||
|
||||
#endif // TREE_SITTER_TREE_CURSOR_H_
|
@ -0,0 +1,50 @@
|
||||
#ifndef TREE_SITTER_UNICODE_H_
|
||||
#define TREE_SITTER_UNICODE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define U_EXPORT
|
||||
#define U_EXPORT2
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
static const int32_t TS_DECODE_ERROR = U_SENTINEL;
|
||||
|
||||
// These functions read one unicode code point from the given string,
|
||||
// returning the number of bytes consumed.
|
||||
typedef uint32_t (*UnicodeDecodeFunction)(
|
||||
const uint8_t *string,
|
||||
uint32_t length,
|
||||
int32_t *code_point
|
||||
);
|
||||
|
||||
static inline uint32_t ts_decode_utf8(
|
||||
const uint8_t *string,
|
||||
uint32_t length,
|
||||
int32_t *code_point
|
||||
) {
|
||||
uint32_t i = 0;
|
||||
U8_NEXT(string, i, length, *code_point);
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline uint32_t ts_decode_utf16(
|
||||
const uint8_t *string,
|
||||
uint32_t length,
|
||||
int32_t *code_point
|
||||
) {
|
||||
uint32_t i = 0;
|
||||
U16_NEXT(((uint16_t *)string), i, length, *code_point);
|
||||
return i * 2;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_UNICODE_H_
|
@ -0,0 +1 @@
|
||||
552b01f61127d30d6589aa4bf99468224979b661
|
@ -0,0 +1,414 @@
|
||||
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
|
||||
|
||||
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
|
||||
---------------------
|
||||
|
||||
Third-Party Software Licenses
|
||||
|
||||
This section contains third-party software notices and/or additional
|
||||
terms for licensed third-party software components included within ICU
|
||||
libraries.
|
||||
|
||||
1. ICU License - ICU 1.8.1 to ICU 57.1
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright (c) 1995-2016 International Business Machines Corporation and others
|
||||
All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, and/or sell copies of the Software, and to permit persons
|
||||
to whom the Software is furnished to do so, provided that the above
|
||||
copyright notice(s) and this permission notice appear in all copies of
|
||||
the Software and that both the above copyright notice(s) and this
|
||||
permission notice appear in supporting documentation.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
|
||||
SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
|
||||
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
|
||||
CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale, use
|
||||
or other dealings in this Software without prior written authorization
|
||||
of the copyright holder.
|
||||
|
||||
All trademarks and registered trademarks mentioned herein are the
|
||||
property of their respective owners.
|
||||
|
||||
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
|
||||
|
||||
# The Google Chrome software developed by Google is licensed under
|
||||
# the BSD license. Other software included in this distribution is
|
||||
# provided under other licenses, as set forth below.
|
||||
#
|
||||
# The BSD License
|
||||
# http://opensource.org/licenses/bsd-license.php
|
||||
# Copyright (C) 2006-2008, Google Inc.
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# Redistributions in binary form must reproduce the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided with
|
||||
# the distribution.
|
||||
# Neither the name of Google Inc. nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
# The word list in cjdict.txt are generated by combining three word lists
|
||||
# listed below with further processing for compound word breaking. The
|
||||
# frequency is generated with an iterative training against Google web
|
||||
# corpora.
|
||||
#
|
||||
# * Libtabe (Chinese)
|
||||
# - https://sourceforge.net/project/?group_id=1519
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# * IPADIC (Japanese)
|
||||
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
|
||||
# - Its license terms and conditions are shown below.
|
||||
#
|
||||
# ---------COPYING.libtabe ---- BEGIN--------------------
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 TaBE Project.
|
||||
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
|
||||
# * All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the TaBE Project nor the names of its
|
||||
# * contributors may be used to endorse or promote products derived
|
||||
# * from this software without specific prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# /*
|
||||
# * Copyright (c) 1999 Computer Systems and Communication Lab,
|
||||
# * Institute of Information Science, Academia
|
||||
# * Sinica. All rights reserved.
|
||||
# *
|
||||
# * Redistribution and use in source and binary forms, with or without
|
||||
# * modification, are permitted provided that the following conditions
|
||||
# * are met:
|
||||
# *
|
||||
# * . Redistributions of source code must retain the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer.
|
||||
# * . Redistributions in binary form must reproduce the above copyright
|
||||
# * notice, this list of conditions and the following disclaimer in
|
||||
# * the documentation and/or other materials provided with the
|
||||
# * distribution.
|
||||
# * . Neither the name of the Computer Systems and Communication Lab
|
||||
# * nor the names of its contributors may be used to endorse or
|
||||
# * promote products derived from this software without specific
|
||||
# * prior written permission.
|
||||
# *
|
||||
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# */
|
||||
#
|
||||
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
|
||||
# University of Illinois
|
||||
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
|
||||
#
|
||||
# ---------------COPYING.libtabe-----END--------------------------------
|
||||
#
|
||||
#
|
||||
# ---------------COPYING.ipadic-----BEGIN-------------------------------
|
||||
#
|
||||
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
|
||||
# and Technology. All Rights Reserved.
|
||||
#
|
||||
# Use, reproduction, and distribution of this software is permitted.
|
||||
# Any copy of this software, whether in its original form or modified,
|
||||
# must include both the above copyright notice and the following
|
||||
# paragraphs.
|
||||
#
|
||||
# Nara Institute of Science and Technology (NAIST),
|
||||
# the copyright holders, disclaims all warranties with regard to this
|
||||
# software, including all implied warranties of merchantability and
|
||||
# fitness, in no event shall NAIST be liable for
|
||||
# any special, indirect or consequential damages or any damages
|
||||
# whatsoever resulting from loss of use, data or profits, whether in an
|
||||
# action of contract, negligence or other tortuous action, arising out
|
||||
# of or in connection with the use or performance of this software.
|
||||
#
|
||||
# A large portion of the dictionary entries
|
||||
# originate from ICOT Free Software. The following conditions for ICOT
|
||||
# Free Software applies to the current dictionary as well.
|
||||
#
|
||||
# Each User may also freely distribute the Program, whether in its
|
||||
# original form or modified, to any third party or parties, PROVIDED
|
||||
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
|
||||
# on, or be attached to, the Program, which is distributed substantially
|
||||
# in the same form as set out herein and that such intended
|
||||
# distribution, if actually made, will neither violate or otherwise
|
||||
# contravene any of the laws and regulations of the countries having
|
||||
# jurisdiction over the User or the intended distribution itself.
|
||||
#
|
||||
# NO WARRANTY
|
||||
#
|
||||
# The program was produced on an experimental basis in the course of the
|
||||
# research and development conducted during the project and is provided
|
||||
# to users as so produced on an experimental basis. Accordingly, the
|
||||
# program is provided without any warranty whatsoever, whether express,
|
||||
# implied, statutory or otherwise. The term "warranty" used herein
|
||||
# includes, but is not limited to, any warranty of the quality,
|
||||
# performance, merchantability and fitness for a particular purpose of
|
||||
# the program and the nonexistence of any infringement or violation of
|
||||
# any right of any third party.
|
||||
#
|
||||
# Each user of the program will agree and understand, and be deemed to
|
||||
# have agreed and understood, that there is no warranty whatsoever for
|
||||
# the program and, accordingly, the entire risk arising from or
|
||||
# otherwise connected with the program is assumed by the user.
|
||||
#
|
||||
# Therefore, neither ICOT, the copyright holder, or any other
|
||||
# organization that participated in or was otherwise related to the
|
||||
# development of the program and their respective officials, directors,
|
||||
# officers and other employees shall be held liable for any and all
|
||||
# damages, including, without limitation, general, special, incidental
|
||||
# and consequential damages, arising out of or otherwise in connection
|
||||
# with the use or inability to use the program or any product, material
|
||||
# or result produced or otherwise obtained by using the program,
|
||||
# regardless of whether they have been advised of, or otherwise had
|
||||
# knowledge of, the possibility of such damages at any time during the
|
||||
# project or thereafter. Each user will be deemed to have agreed to the
|
||||
# foregoing by his or her commencement of use of the program. The term
|
||||
# "use" as used herein includes, but is not limited to, the use,
|
||||
# modification, copying and distribution of the program and the
|
||||
# production of secondary products from the program.
|
||||
#
|
||||
# In the case where the program, whether in its original form or
|
||||
# modified, was distributed or delivered to or received by a user from
|
||||
# any person, organization or entity other than ICOT, unless it makes or
|
||||
# grants independently of ICOT any specific warranty to the user in
|
||||
# writing, such person, organization or entity, will also be exempted
|
||||
# from and not be held liable to the user for any such damages as noted
|
||||
# above as far as the program is concerned.
|
||||
#
|
||||
# ---------------COPYING.ipadic-----END----------------------------------
|
||||
|
||||
3. Lao Word Break Dictionary Data (laodict.txt)
|
||||
|
||||
# Copyright (c) 2013 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# Project: http://code.google.com/p/lao-dictionary/
|
||||
# Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
|
||||
# License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
|
||||
# (copied below)
|
||||
#
|
||||
# This file is derived from the above dictionary, with slight
|
||||
# modifications.
|
||||
# ----------------------------------------------------------------------
|
||||
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification,
|
||||
# are permitted provided that the following conditions are met:
|
||||
#
|
||||
#
|
||||
# Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer. Redistributions in
|
||||
# binary form must reproduce the above copyright notice, this list of
|
||||
# conditions and the following disclaimer in the documentation and/or
|
||||
# other materials provided with the distribution.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
# OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
4. Burmese Word Break Dictionary Data (burmesedict.txt)
|
||||
|
||||
# Copyright (c) 2014 International Business Machines Corporation
|
||||
# and others. All Rights Reserved.
|
||||
#
|
||||
# This list is part of a project hosted at:
|
||||
# github.com/kanyawtech/myanmar-karen-word-lists
|
||||
#
|
||||
# --------------------------------------------------------------------------
|
||||
# Copyright (c) 2013, LeRoy Benjamin Sharon
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met: Redistributions of source code must retain the above
|
||||
# copyright notice, this list of conditions and the following
|
||||
# disclaimer. Redistributions in binary form must reproduce the
|
||||
# above copyright notice, this list of conditions and the following
|
||||
# disclaimer in the documentation and/or other materials provided
|
||||
# with the distribution.
|
||||
#
|
||||
# Neither the name Myanmar Karen Word Lists, nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
|
||||
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
5. Time Zone Database
|
||||
|
||||
ICU uses the public domain data and code derived from Time Zone
|
||||
Database for its time zone support. The ownership of the TZ database
|
||||
is explained in BCP 175: Procedure for Maintaining the Time Zone
|
||||
Database section 7.
|
||||
|
||||
# 7. Database Ownership
|
||||
#
|
||||
# The TZ database itself is not an IETF Contribution or an IETF
|
||||
# document. Rather it is a pre-existing and regularly updated work
|
||||
# that is in the public domain, and is intended to remain in the
|
||||
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
|
||||
# not apply to the TZ Database or contributions that individuals make
|
||||
# to it. Should any claims be made and substantiated against the TZ
|
||||
# Database, the organization that is providing the IANA
|
||||
# Considerations defined in this RFC, under the memorandum of
|
||||
# understanding with the IETF, currently ICANN, may act in accordance
|
||||
# with all competent court orders. No ownership claims will be made
|
||||
# by ICANN or the IETF Trust on the database or the code. Any person
|
||||
# making a contribution to the database or code waives all rights to
|
||||
# future claims in that contribution or in the TZ Database.
|
||||
|
||||
6. Google double-conversion
|
||||
|
||||
Copyright 2006-2011, the V8 project authors. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,29 @@
|
||||
# ICU Parts
|
||||
|
||||
This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
|
||||
|
||||
### License
|
||||
|
||||
The license for these files is contained in the `LICENSE` file within this directory.
|
||||
|
||||
### Contents
|
||||
|
||||
* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
|
||||
* `utf8.h`
|
||||
* `utf16.h`
|
||||
* `umachine.h`
|
||||
* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
|
||||
* `ptypes.h`
|
||||
* `urename.h`
|
||||
* `utf.h`
|
||||
* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
|
||||
* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
|
||||
* `README.md` - This text file.
|
||||
|
||||
### Updating ICU
|
||||
|
||||
To incorporate changes from the upstream `icu` repository:
|
||||
|
||||
* Update `ICU_SHA` with the new Git SHA.
|
||||
* Update `LICENSE` with the license text from the directory mentioned above.
|
||||
* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.
|
@ -0,0 +1 @@
|
||||
// This file must exist in order for `utf8.h` and `utf16.h` to be used.
|
@ -0,0 +1,448 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
******************************************************************************
|
||||
* file name: umachine.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This file defines basic types and constants for ICU to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*/
|
||||
|
||||
#ifndef __UMACHINE_H__
|
||||
#define __UMACHINE_H__
|
||||
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Basic types and constants for UTF
|
||||
*
|
||||
* <h2> Basic types and constants for UTF </h2>
|
||||
* This file defines basic types and constants for utf.h to be
|
||||
* platform-independent. umachine.h and utf.h are included into
|
||||
* utypes.h to provide all the general definitions for ICU.
|
||||
* All of these definitions used to be in utypes.h before
|
||||
* the UTF-handling macros made this unmaintainable.
|
||||
*
|
||||
*/
|
||||
/*==========================================================================*/
|
||||
/* Include platform-dependent definitions */
|
||||
/* which are contained in the platform-specific file platform.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
|
||||
|
||||
/*
|
||||
* ANSI C headers:
|
||||
* stddef.h defines wchar_t
|
||||
*/
|
||||
#include <stddef.h>
|
||||
|
||||
/*==========================================================================*/
|
||||
/* For C wrappers, we use the symbol U_STABLE. */
|
||||
/* This works properly if the includer is C or C++. */
|
||||
/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
|
||||
/*==========================================================================*/
|
||||
|
||||
/**
|
||||
* \def U_CFUNC
|
||||
* This is used in a declaration of a library private ICU C function.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_BEGIN
|
||||
* This is used to begin a declaration of a library private ICU C API.
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
/**
|
||||
* \def U_CDECL_END
|
||||
* This is used to end a declaration of a library private ICU C API
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define U_CFUNC extern "C"
|
||||
# define U_CDECL_BEGIN extern "C" {
|
||||
# define U_CDECL_END }
|
||||
#else
|
||||
# define U_CFUNC extern
|
||||
# define U_CDECL_BEGIN
|
||||
# define U_CDECL_END
|
||||
#endif
|
||||
|
||||
#ifndef U_ATTRIBUTE_DEPRECATED
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for GCC specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#if U_GCC_MAJOR_MINOR >= 302
|
||||
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
|
||||
/**
|
||||
* \def U_ATTRIBUTE_DEPRECATED
|
||||
* This is used for Visual C++ specific attributes
|
||||
* @internal
|
||||
*/
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
# define U_ATTRIBUTE_DEPRECATED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
|
||||
#define U_CAPI U_CFUNC U_EXPORT
|
||||
/** This is used to declare a function as a stable public ICU C API*/
|
||||
#define U_STABLE U_CAPI
|
||||
/** This is used to declare a function as a draft public ICU C API */
|
||||
#define U_DRAFT U_CAPI
|
||||
/** This is used to declare a function as a deprecated public ICU C API */
|
||||
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
|
||||
/** This is used to declare a function as an obsolete public ICU C API */
|
||||
#define U_OBSOLETE U_CAPI
|
||||
/** This is used to declare a function as an internal ICU C API */
|
||||
#define U_INTERNAL U_CAPI
|
||||
|
||||
/**
|
||||
* \def U_OVERRIDE
|
||||
* Defined to the C++11 "override" keyword if available.
|
||||
* Denotes a class or member which is an override of the base class.
|
||||
* May result in an error if it applied to something not an override.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef U_OVERRIDE
|
||||
#define U_OVERRIDE override
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_FINAL
|
||||
* Defined to the C++11 "final" keyword if available.
|
||||
* Denotes a class or member which may not be overridden in subclasses.
|
||||
* May result in an error if subclasses attempt to override.
|
||||
* @internal
|
||||
*/
|
||||
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
|
||||
#define U_FINAL final
|
||||
#endif
|
||||
|
||||
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
|
||||
// series of statements wrapped in { } blocks and the caller could choose to
|
||||
// either treat them as if they were actual functions and end the invocation
|
||||
// with a trailing ; creating an empty statement after the block or else omit
|
||||
// this trailing ; using the knowledge that the macro would expand to { }.
|
||||
//
|
||||
// But doing so doesn't work well with macros that look like functions and
|
||||
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
|
||||
// switches to the standard solution of wrapping such macros in do { } while.
|
||||
//
|
||||
// This will however break existing code that depends on being able to invoke
|
||||
// these macros without a trailing ; so to be able to remain compatible with
|
||||
// such code the wrapper is itself defined as macros so that it's possible to
|
||||
// build ICU 65 and later with the old macro behaviour, like this:
|
||||
//
|
||||
// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
|
||||
// runConfigureICU ...
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_BEGIN
|
||||
* Defined as the "do" keyword by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_BEGIN
|
||||
#define UPRV_BLOCK_MACRO_BEGIN do
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UPRV_BLOCK_MACRO_END
|
||||
* Defined as "while (FALSE)" by default.
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UPRV_BLOCK_MACRO_END
|
||||
#define UPRV_BLOCK_MACRO_END while (FALSE)
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* limits for int32_t etc., like in POSIX inttypes.h */
|
||||
/*==========================================================================*/
|
||||
|
||||
#ifndef INT8_MIN
|
||||
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MIN ((int8_t)(-128))
|
||||
#endif
|
||||
#ifndef INT16_MIN
|
||||
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MIN ((int16_t)(-32767-1))
|
||||
#endif
|
||||
#ifndef INT32_MIN
|
||||
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MIN ((int32_t)(-2147483647-1))
|
||||
#endif
|
||||
|
||||
#ifndef INT8_MAX
|
||||
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT8_MAX ((int8_t)(127))
|
||||
#endif
|
||||
#ifndef INT16_MAX
|
||||
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT16_MAX ((int16_t)(32767))
|
||||
#endif
|
||||
#ifndef INT32_MAX
|
||||
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
|
||||
# define INT32_MAX ((int32_t)(2147483647))
|
||||
#endif
|
||||
|
||||
#ifndef UINT8_MAX
|
||||
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT8_MAX ((uint8_t)(255U))
|
||||
#endif
|
||||
#ifndef UINT16_MAX
|
||||
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT16_MAX ((uint16_t)(65535U))
|
||||
#endif
|
||||
#ifndef UINT32_MAX
|
||||
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
|
||||
# define UINT32_MAX ((uint32_t)(4294967295U))
|
||||
#endif
|
||||
|
||||
#if defined(U_INT64_T_UNAVAILABLE)
|
||||
# error int64_t is required for decimal format and rule-based number format.
|
||||
#else
|
||||
# ifndef INT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify a signed 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define INT64_C(c) c ## LL
|
||||
# endif
|
||||
# ifndef UINT64_C
|
||||
/**
|
||||
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
|
||||
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
# define UINT64_C(c) c ## ULL
|
||||
# endif
|
||||
# ifndef U_INT64_MIN
|
||||
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
|
||||
# endif
|
||||
# ifndef U_INT64_MAX
|
||||
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
|
||||
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
|
||||
# endif
|
||||
# ifndef U_UINT64_MAX
|
||||
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
|
||||
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Boolean data type */
|
||||
/*==========================================================================*/
|
||||
|
||||
/** The ICU boolean type @stable ICU 2.0 */
|
||||
typedef int8_t UBool;
|
||||
|
||||
#ifndef TRUE
|
||||
/** The TRUE value of a UBool @stable ICU 2.0 */
|
||||
# define TRUE 1
|
||||
#endif
|
||||
#ifndef FALSE
|
||||
/** The FALSE value of a UBool @stable ICU 2.0 */
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Unicode data types */
|
||||
/*==========================================================================*/
|
||||
|
||||
/* wchar_t-related definitions -------------------------------------------- */
|
||||
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF16
|
||||
* Defined if wchar_t uses UTF-16.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
/*
|
||||
* \def U_WCHAR_IS_UTF32
|
||||
* Defined if wchar_t uses UTF-32.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
|
||||
# ifdef __STDC_ISO_10646__
|
||||
# if (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# elif (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif defined __UCS2__
|
||||
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
|
||||
# if (U_SIZEOF_WCHAR_T==4)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# endif
|
||||
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
|
||||
# define U_WCHAR_IS_UTF32
|
||||
# elif U_PLATFORM_HAS_WIN32_API
|
||||
# define U_WCHAR_IS_UTF16
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* UChar and UChar32 definitions -------------------------------------------- */
|
||||
|
||||
/** Number of bytes in a UChar. @stable ICU 2.0 */
|
||||
#define U_SIZEOF_UCHAR 2
|
||||
|
||||
/**
|
||||
* \def U_CHAR16_IS_TYPEDEF
|
||||
* If 1, then char16_t is a typedef and not a real type (yet)
|
||||
* @internal
|
||||
*/
|
||||
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
|
||||
// for AIX, uchar.h needs to be included
|
||||
# include <uchar.h>
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
|
||||
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
|
||||
# define U_CHAR16_IS_TYPEDEF 1
|
||||
#else
|
||||
# define U_CHAR16_IS_TYPEDEF 0
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* \var UChar
|
||||
*
|
||||
* The base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
|
||||
*
|
||||
* UChar is configurable by defining the macro UCHAR_TYPE
|
||||
* on the preprocessor or compiler command line:
|
||||
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
|
||||
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
|
||||
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
|
||||
*
|
||||
* The default is UChar=char16_t.
|
||||
*
|
||||
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
|
||||
*
|
||||
* In C, char16_t is a simple typedef of uint_least16_t.
|
||||
* ICU requires uint_least16_t=uint16_t for data memory mapping.
|
||||
* On macOS, char16_t is not available because the uchar.h standard header is missing.
|
||||
*
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
|
||||
#if 1
|
||||
// #if 1 is normal. UChar defaults to char16_t in C++.
|
||||
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
|
||||
// The intltest Makefile #defines UCHAR_TYPE=char16_t,
|
||||
// so we only #define it to uint16_t if it is undefined so far.
|
||||
#elif !defined(UCHAR_TYPE)
|
||||
# define UCHAR_TYPE uint16_t
|
||||
#endif
|
||||
|
||||
#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
|
||||
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
|
||||
// Inside the ICU library code, never configurable.
|
||||
typedef char16_t UChar;
|
||||
#elif defined(UCHAR_TYPE)
|
||||
typedef UCHAR_TYPE UChar;
|
||||
#elif defined(__cplusplus)
|
||||
typedef char16_t UChar;
|
||||
#else
|
||||
typedef uint16_t UChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \var OldUChar
|
||||
* Default ICU 58 definition of UChar.
|
||||
* A base type for UTF-16 code units and pointers.
|
||||
* Unsigned 16-bit integer.
|
||||
*
|
||||
* Define OldUChar to be wchar_t if that is 16 bits wide.
|
||||
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
|
||||
*
|
||||
* This makes the definition of OldUChar platform-dependent
|
||||
* but allows direct string type compatibility with platforms with
|
||||
* 16-bit wchar_t types.
|
||||
*
|
||||
* This is how UChar was defined in ICU 58, for transition convenience.
|
||||
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
|
||||
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
|
||||
*
|
||||
* @stable ICU 59
|
||||
*/
|
||||
#if U_SIZEOF_WCHAR_T==2
|
||||
typedef wchar_t OldUChar;
|
||||
#elif defined(__CHAR16_TYPE__)
|
||||
typedef __CHAR16_TYPE__ OldUChar;
|
||||
#else
|
||||
typedef uint16_t OldUChar;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Define UChar32 as a type for single Unicode code points.
|
||||
* UChar32 is a signed 32-bit integer (same as int32_t).
|
||||
*
|
||||
* The Unicode code point range is 0..0x10ffff.
|
||||
* All other values (negative or >=0x110000) are illegal as Unicode code points.
|
||||
* They may be used as sentinel values to indicate "done", "error"
|
||||
* or similar non-code point conditions.
|
||||
*
|
||||
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
|
||||
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
|
||||
* or else to be uint32_t.
|
||||
* That is, the definition of UChar32 was platform-dependent.
|
||||
*
|
||||
* @see U_SENTINEL
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
typedef int32_t UChar32;
|
||||
|
||||
/**
|
||||
* This value is intended for sentinel values for APIs that
|
||||
* (take or) return single code points (UChar32).
|
||||
* It is outside of the Unicode code point range 0..0x10ffff.
|
||||
*
|
||||
* For example, a "done" or "error" value in a new API
|
||||
* could be indicated with U_SENTINEL.
|
||||
*
|
||||
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
|
||||
* values, mostly 0xffff.
|
||||
* Those may need to be distinguished from
|
||||
* actual U+ffff text contents by calling functions like
|
||||
* CharacterIterator::hasNext() or UnicodeString::length().
|
||||
*
|
||||
* @return -1
|
||||
* @see UChar32
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_SENTINEL (-1)
|
||||
|
||||
#include "unicode/urename.h"
|
||||
|
||||
#endif
|
@ -0,0 +1 @@
|
||||
// This file must exist in order for `utf8.h` and `utf16.h` to be used.
|
@ -0,0 +1 @@
|
||||
// This file must exist in order for `utf8.h` and `utf16.h` to be used.
|
@ -0,0 +1,733 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2012, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf16.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep09
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 16-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (http://userguide.icu-project.org/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF16_H__
|
||||
#define __UTF16_H__
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit alone encode a code point (BMP, not a surrogate)?
|
||||
* @param c 16-bit code unit
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Is this code unit a lead surrogate (U+d800..U+dbff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
|
||||
|
||||
/**
|
||||
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
|
||||
|
||||
/**
|
||||
* Is this code unit a surrogate (U+d800..U+dfff)?
|
||||
* @param c 16-bit code unit
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a lead surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
|
||||
|
||||
/**
|
||||
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
|
||||
* is it a trail surrogate?
|
||||
* @param c 16-bit code unit
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
|
||||
|
||||
/**
|
||||
* Helper constant for U16_GET_SUPPLEMENTARY.
|
||||
* @internal
|
||||
*/
|
||||
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
|
||||
|
||||
/**
|
||||
* Get a supplementary code point value (U+10000..U+10ffff)
|
||||
* from its lead and trail surrogates.
|
||||
* The result is undefined if the input values are not
|
||||
* lead and trail surrogates.
|
||||
*
|
||||
* @param lead lead surrogate (U+d800..U+dbff)
|
||||
* @param trail trail surrogate (U+dc00..U+dfff)
|
||||
* @return supplementary code point (U+10000..U+10ffff)
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_SUPPLEMENTARY(lead, trail) \
|
||||
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
|
||||
|
||||
|
||||
/**
|
||||
* Get the lead surrogate (0xd800..0xdbff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return lead surrogate (U+d800..U+dbff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
|
||||
|
||||
/**
|
||||
* Get the trail surrogate (0xdc00..0xdfff) for a
|
||||
* supplementary code point (0x10000..0x10ffff).
|
||||
* @param supplementary 32-bit code point (U+10000..U+10ffff)
|
||||
* @return trail surrogate (U+dc00..U+dfff) for supplementary
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
|
||||
|
||||
/**
|
||||
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
|
||||
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
|
||||
* @param c 32-bit code point
|
||||
* @return 1 or 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
|
||||
|
||||
/**
|
||||
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 2
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_MAX_LENGTH 2
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
* The result is undefined if the offset points to a single, unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
|
||||
} else { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to that unpaired surrogate.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The offset may point to either the lead or trail surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the adjacent matching surrogate as well.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to a single, unpaired surrogate, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_GET_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[i]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c)) { \
|
||||
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} else { \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset points to a single, unpaired lead surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_LEAD(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead surrogate unit
|
||||
* for a supplementary code point, in which case the macro will read
|
||||
* the following trail surrogate as well.
|
||||
* If the offset points to a trail surrogate or
|
||||
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_NEXT_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[(i)++]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
|
||||
++(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U16_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 or 2 code units.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a surrogate pair is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or a trail surrogate does not fit,
|
||||
* then isError is set to TRUE.
|
||||
*
|
||||
* @param s const UChar * string buffer
|
||||
* @param i string offset, must be i<capacity
|
||||
* @param capacity size of the string buffer
|
||||
* @param c code point to append
|
||||
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
|
||||
* @see U16_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((uint32_t)(c)<=0xffff) { \
|
||||
(s)[(i)++]=(uint16_t)(c); \
|
||||
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
|
||||
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
|
||||
} else /* c>0x10ffff or not enough space */ { \
|
||||
(isError)=TRUE; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset, must be i<length
|
||||
* @param length string length
|
||||
* @see U16_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U16_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U16_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to the trail surrogate of a surrogate pair,
|
||||
* then the offset is decremented.
|
||||
* Otherwise, it is not modified.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<=i
|
||||
* @see U16_SET_CP_START_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to that unpaired surrogate.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_TRAIL(c)) { \
|
||||
uint16_t __c2; \
|
||||
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a trail surrogate unit
|
||||
* for a supplementary code point, then the macro will read
|
||||
* the preceding lead surrogate as well.
|
||||
* If the offset is behind a lead surrogate or behind a single, unpaired
|
||||
* trail surrogate, then c is set to U+FFFD.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @param c output UChar32 variable
|
||||
* @see U16_PREV_UNSAFE
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(s)[--(i)]; \
|
||||
if(U16_IS_SURROGATE(c)) { \
|
||||
uint16_t __c2; \
|
||||
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
|
||||
--(i); \
|
||||
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
} else { \
|
||||
(c)=0xfffd; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start starting string offset (usually 0)
|
||||
* @param i string offset, must be start<i
|
||||
* @see U16_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
|
||||
--(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U16_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start start of string
|
||||
* @param i string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U16_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U16_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-16.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param i string offset
|
||||
* @see U16_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U16_IS_LEAD((s)[(i)-1])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind the lead surrogate of a surrogate pair,
|
||||
* then the offset is incremented.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const UChar * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U16_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
@ -0,0 +1,881 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: utf8.h
|
||||
* encoding: UTF-8
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: 8-bit Unicode handling macros
|
||||
*
|
||||
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
|
||||
*
|
||||
* For more information see utf.h and the ICU User Guide Strings chapter
|
||||
* (http://userguide.icu-project.org/strings).
|
||||
*
|
||||
* <em>Usage:</em>
|
||||
* ICU coding guidelines for if() statements should be followed when using these macros.
|
||||
* Compound statements (curly braces {}) must be used for if-else-while...
|
||||
* bodies and all macro statements should be terminated with semicolon.
|
||||
*/
|
||||
|
||||
#ifndef __UTF8_H__
|
||||
#define __UTF8_H__
|
||||
|
||||
#include "unicode/umachine.h"
|
||||
#ifndef __UTF_H__
|
||||
# include "unicode/utf.h"
|
||||
#endif
|
||||
|
||||
/* internal definitions ----------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte.
|
||||
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES(leadByte) \
|
||||
(U8_IS_LEAD(leadByte) ? \
|
||||
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
|
||||
|
||||
/**
|
||||
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
|
||||
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
|
||||
* leadByte might be evaluated multiple times.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
*
|
||||
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
|
||||
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
|
||||
|
||||
/**
|
||||
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is called by public macros in this file and thus must remain stable.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
|
||||
|
||||
/**
|
||||
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* Lead byte E0..EF bits 3..0 are used as byte index,
|
||||
* first trail byte bits 7..5 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD3_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
|
||||
|
||||
/**
|
||||
* Internal 3-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
|
||||
|
||||
/**
|
||||
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
|
||||
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
|
||||
* First trail byte bits 7..4 are used as byte index,
|
||||
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
|
||||
* @see U8_IS_VALID_LEAD4_AND_T1
|
||||
* @internal
|
||||
*/
|
||||
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
|
||||
|
||||
/**
|
||||
* Internal 4-byte UTF-8 validity check.
|
||||
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
|
||||
* @internal
|
||||
*/
|
||||
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
|
||||
|
||||
/**
|
||||
* Function for handling "next code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_STABLE UChar32 U_EXPORT2
|
||||
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
|
||||
|
||||
/**
|
||||
* Function for handling "append code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
|
||||
|
||||
/**
|
||||
* Function for handling "previous code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_STABLE UChar32 U_EXPORT2
|
||||
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
|
||||
|
||||
/**
|
||||
* Function for handling "skip backward one code point" with error-checking.
|
||||
*
|
||||
* This is internal since it is not meant to be called directly by external clients;
|
||||
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
|
||||
* file and thus must remain stable, and should not be hidden when other internal
|
||||
* functions are hidden (otherwise public macros would fail to compile).
|
||||
* @internal
|
||||
*/
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
|
||||
|
||||
/* single-code point definitions -------------------------------------------- */
|
||||
|
||||
/**
|
||||
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
|
||||
// 0x32=0xf4-0xc2
|
||||
|
||||
/**
|
||||
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
|
||||
* @param c 8-bit code unit (byte)
|
||||
* @return TRUE or FALSE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
|
||||
|
||||
/**
|
||||
* How many code units (bytes) are used for the UTF-8 encoding
|
||||
* of this Unicode code point?
|
||||
* @param c 32-bit code point
|
||||
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_LENGTH(c) \
|
||||
((uint32_t)(c)<=0x7f ? 1 : \
|
||||
((uint32_t)(c)<=0x7ff ? 2 : \
|
||||
((uint32_t)(c)<=0xd7ff ? 3 : \
|
||||
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
|
||||
((uint32_t)(c)<=0xffff ? 3 : 4)\
|
||||
) \
|
||||
) \
|
||||
) \
|
||||
)
|
||||
|
||||
/**
|
||||
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
|
||||
* @return 4
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_MAX_LENGTH 4
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
* The result is undefined if the offset points to an illegal UTF-8
|
||||
* byte sequence.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_GET
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_unsafe_index=(int32_t)(i); \
|
||||
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
|
||||
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to a negative value.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_GET_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a random-access offset,
|
||||
* without changing the offset.
|
||||
* The offset may point to either the lead byte or one of the trail bytes
|
||||
* for a code point, in which case the macro will read all of the bytes
|
||||
* for the code point.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* If the offset points to an illegal UTF-8 byte sequence, then
|
||||
* c is set to U+FFFD.
|
||||
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_GET() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset
|
||||
* @param i int32_t string offset, must be start<=i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_GET
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t _u8_get_index=(i); \
|
||||
U8_SET_CP_START(s, start, _u8_get_index); \
|
||||
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with forward iteration --------------------------------------- */
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* The result is undefined if the offset points to a trail byte
|
||||
* or an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
if((c)<0xe0) { \
|
||||
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
|
||||
} else if((c)<0xf0) { \
|
||||
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
|
||||
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
|
||||
(i)+=2; \
|
||||
} else { \
|
||||
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
|
||||
(i)+=3; \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_NEXT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
|
||||
|
||||
/**
|
||||
* Get a code point from a string at a code point boundary offset,
|
||||
* and advance the offset to the next code point boundary.
|
||||
* (Post-incrementing forward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* The offset may point to the lead byte of a multi-byte sequence,
|
||||
* in which case the macro will read the whole sequence.
|
||||
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
|
||||
* c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_NEXT() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_NEXT
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
|
||||
|
||||
/** @internal */
|
||||
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[(i)++]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
uint8_t __t = 0; \
|
||||
if((i)!=(length) && \
|
||||
/* fetch/validate/assemble all but last trail byte */ \
|
||||
((c)>=0xe0 ? \
|
||||
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
|
||||
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
|
||||
(__t&=0x3f, 1) \
|
||||
: /* U+10000..U+10FFFF */ \
|
||||
((c)-=0xf0)<=4 && \
|
||||
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
|
||||
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
|
||||
(__t=(s)[i]-0x80)<=0x3f) && \
|
||||
/* valid second-to-last trail byte */ \
|
||||
((c)=((c)<<6)|__t, ++(i)!=(length)) \
|
||||
: /* U+0080..U+07FF */ \
|
||||
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
|
||||
/* last trail byte */ \
|
||||
(__t=(s)[i]-0x80)<=0x3f && \
|
||||
((c)=((c)<<6)|__t, ++(i), 1)) { \
|
||||
} else { \
|
||||
(c)=(sub); /* ill-formed*/ \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
|
||||
* Otherwise, the result is undefined.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i string offset
|
||||
* @param c code point to append
|
||||
* @see U8_APPEND
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else { \
|
||||
if(__uc<=0x7ff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
} else { \
|
||||
if(__uc<=0xffff) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
} else { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
} \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Append a code point to a string, overwriting 1 to 4 bytes.
|
||||
* The offset points to the current end of the string contents
|
||||
* and is advanced (post-increment).
|
||||
* "Safe" macro, checks for a valid code point.
|
||||
* If a non-ASCII code point is written, checks for sufficient space in the string.
|
||||
* If the code point is not valid or trail bytes do not fit,
|
||||
* then isError is set to TRUE.
|
||||
*
|
||||
* @param s const uint8_t * string buffer
|
||||
* @param i int32_t string offset, must be i<capacity
|
||||
* @param capacity int32_t size of the string buffer
|
||||
* @param c UChar32 code point to append
|
||||
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
|
||||
* @see U8_APPEND_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint32_t __uc=(c); \
|
||||
if(__uc<=0x7f) { \
|
||||
(s)[(i)++]=(uint8_t)__uc; \
|
||||
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
|
||||
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
|
||||
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
|
||||
} else { \
|
||||
(isError)=TRUE; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_FWD_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the next.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @see U8_FWD_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
uint8_t __b=(s)[(i)++]; \
|
||||
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
|
||||
uint8_t __t1=(s)[i]; \
|
||||
if((0xe0<=__b && __b<0xf0)) { \
|
||||
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else if(__b<0xe0) { \
|
||||
if(U8_IS_TRAIL(__t1)) { \
|
||||
++(i); \
|
||||
} \
|
||||
} else /* c>=0xf0 */ { \
|
||||
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
|
||||
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
|
||||
++(i); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Advance the string offset from one code point boundary to the n-th next one,
|
||||
* i.e., move forward by n code points.
|
||||
* (Post-incrementing iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i int32_t string offset, must be i<length
|
||||
* @param length int32_t string length
|
||||
* @param n number of code points to skip
|
||||
* @see U8_FWD_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
|
||||
U8_FWD_1(s, i, length); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[i])) { --(i); } \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary
|
||||
* at the start of a code point.
|
||||
* If the offset points to a UTF-8 trail byte,
|
||||
* then the offset is moved backward to the corresponding lead byte.
|
||||
* Otherwise, it is not modified.
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i
|
||||
* @see U8_SET_CP_START_UNSAFE
|
||||
* @see U8_TRUNCATE_IF_INCOMPLETE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* If the string ends with a UTF-8 byte sequence that is valid so far
|
||||
* but incomplete, then reduce the length of the string to end before
|
||||
* the lead byte of that incomplete sequence.
|
||||
* For example, if the string ends with E1 80, the length is reduced by 2.
|
||||
*
|
||||
* In all other cases (the string ends with a complete sequence, or it is not
|
||||
* possible for any further trail byte to extend the trailing sequence)
|
||||
* the length remains unchanged.
|
||||
*
|
||||
* Useful for processing text split across multiple buffers
|
||||
* (save the incomplete sequence for later)
|
||||
* and for optimizing iteration
|
||||
* (check for string length only once per character).
|
||||
*
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
* Unlike U8_SET_CP_START(), this macro never reads s[length].
|
||||
*
|
||||
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param length int32_t string length (usually start<=length)
|
||||
* @see U8_SET_CP_START
|
||||
* @stable ICU 61
|
||||
*/
|
||||
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((length)>(start)) { \
|
||||
uint8_t __b1=s[(length)-1]; \
|
||||
if(U8_IS_SINGLE(__b1)) { \
|
||||
/* common ASCII character */ \
|
||||
} else if(U8_IS_LEAD(__b1)) { \
|
||||
--(length); \
|
||||
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
|
||||
uint8_t __b2=s[(length)-2]; \
|
||||
if(0xe0<=__b2 && __b2<=0xf4) { \
|
||||
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
|
||||
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
|
||||
(length)-=2; \
|
||||
} \
|
||||
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
|
||||
uint8_t __b3=s[(length)-3]; \
|
||||
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
|
||||
(length)-=3; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/* definitions with backward iteration -------------------------------------- */
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param c output UChar32 variable
|
||||
* @see U8_PREV
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(U8_IS_TRAIL(c)) { \
|
||||
uint8_t __b, __count=1, __shift=6; \
|
||||
\
|
||||
/* c is a trail byte */ \
|
||||
(c)&=0x3f; \
|
||||
for(;;) { \
|
||||
__b=(s)[--(i)]; \
|
||||
if(__b>=0xc0) { \
|
||||
U8_MASK_LEAD_BYTE(__b, __count); \
|
||||
(c)|=(UChar32)__b<<__shift; \
|
||||
break; \
|
||||
} else { \
|
||||
(c)|=(UChar32)(__b&0x3f)<<__shift; \
|
||||
++__count; \
|
||||
__shift+=6; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to <0 in case of an error
|
||||
* @see U8_PREV_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one
|
||||
* and get the code point between them.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The input offset may be the same as the string length.
|
||||
* If the offset is behind a multi-byte sequence, then the macro will read
|
||||
* the whole sequence.
|
||||
* If the offset is behind a lead byte, then that itself
|
||||
* will be returned as the code point.
|
||||
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
|
||||
*
|
||||
* This macro does not distinguish between a real U+FFFD in the text
|
||||
* and U+FFFD returned for an ill-formed sequence.
|
||||
* Use U8_PREV() if that distinction is important.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param c output UChar32 variable, set to U+FFFD in case of an error
|
||||
* @see U8_PREV
|
||||
* @stable ICU 51
|
||||
*/
|
||||
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
(c)=(uint8_t)(s)[--(i)]; \
|
||||
if(!U8_IS_SINGLE(c)) { \
|
||||
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_BACK_1
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
while(U8_IS_TRAIL((s)[--(i)])) {} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the previous one.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @see U8_BACK_1_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if(U8_IS_TRAIL((s)[--(i)])) { \
|
||||
(i)=utf8_back1SafeBody(s, start, (i)); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0) { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Move the string offset from one code point boundary to the n-th one before it,
|
||||
* i.e., move backward by n code points.
|
||||
* (Pre-decrementing backward iteration.)
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t index of the start of the string
|
||||
* @param i int32_t string offset, must be start<i
|
||||
* @param n number of code points to skip
|
||||
* @see U8_BACK_N_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t __N=(n); \
|
||||
while(__N>0 && (i)>(start)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
--__N; \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Unsafe" macro, assumes well-formed UTF-8.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param i string offset
|
||||
* @see U8_SET_CP_LIMIT
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U8_BACK_1_UNSAFE(s, i); \
|
||||
U8_FWD_1_UNSAFE(s, i); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
/**
|
||||
* Adjust a random-access offset to a code point boundary after a code point.
|
||||
* If the offset is behind a partial multi-byte sequence,
|
||||
* then the offset is incremented to behind the whole sequence.
|
||||
* Otherwise, it is not modified.
|
||||
* The input offset may be the same as the string length.
|
||||
* "Safe" macro, checks for illegal sequences and for string boundaries.
|
||||
*
|
||||
* The length can be negative for a NUL-terminated string.
|
||||
*
|
||||
* @param s const uint8_t * string
|
||||
* @param start int32_t starting string offset (usually 0)
|
||||
* @param i int32_t string offset, must be start<=i<=length
|
||||
* @param length int32_t string length
|
||||
* @see U8_SET_CP_LIMIT_UNSAFE
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
if((start)<(i) && ((i)<(length) || (length)<0)) { \
|
||||
U8_BACK_1(s, start, i); \
|
||||
U8_FWD_1(s, i, length); \
|
||||
} \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
|
||||
#endif
|
@ -0,0 +1,24 @@
|
||||
"calloc",
|
||||
"free",
|
||||
"iswalnum",
|
||||
"iswalpha",
|
||||
"iswblank",
|
||||
"iswdigit",
|
||||
"iswlower",
|
||||
"iswspace",
|
||||
"iswupper",
|
||||
"iswxdigit",
|
||||
"malloc",
|
||||
"memchr",
|
||||
"memcmp",
|
||||
"memcpy",
|
||||
"memmove",
|
||||
"memset",
|
||||
"realloc",
|
||||
"strcmp",
|
||||
"strlen",
|
||||
"strncat",
|
||||
"strncmp",
|
||||
"strncpy",
|
||||
"towlower",
|
||||
"towupper",
|
@ -0,0 +1,109 @@
|
||||
// This file implements a very simple allocator for external scanners running
|
||||
// in WASM. Allocation is just bumping a static pointer and growing the heap
|
||||
// as needed, and freeing is mostly a noop. But in the special case of freeing
|
||||
// the last-allocated pointer, we'll reuse that pointer again.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
extern void tree_sitter_debug_message(const char *, size_t);
|
||||
|
||||
#define PAGESIZE 0x10000
|
||||
#define MAX_HEAP_SIZE (4 * 1024 * 1024)
|
||||
|
||||
typedef struct {
|
||||
size_t size;
|
||||
char data[0];
|
||||
} Region;
|
||||
|
||||
static Region *heap_end = NULL;
|
||||
static Region *heap_start = NULL;
|
||||
static Region *next = NULL;
|
||||
|
||||
// Get the region metadata for the given heap pointer.
|
||||
static inline Region *region_for_ptr(void *ptr) {
|
||||
return ((Region *)ptr) - 1;
|
||||
}
|
||||
|
||||
// Get the location of the next region after the given region,
|
||||
// if the given region had the given size.
|
||||
static inline Region *region_after(Region *self, size_t len) {
|
||||
char *address = self->data + len;
|
||||
char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3);
|
||||
return (Region *)aligned;
|
||||
}
|
||||
|
||||
static void *get_heap_end() {
|
||||
return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE);
|
||||
}
|
||||
|
||||
static int grow_heap(size_t size) {
|
||||
size_t new_page_count = ((size - 1) / PAGESIZE) + 1;
|
||||
return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX;
|
||||
}
|
||||
|
||||
// Clear out the heap, and move it to the given address.
|
||||
void reset_heap(void *new_heap_start) {
|
||||
heap_start = new_heap_start;
|
||||
next = new_heap_start;
|
||||
heap_end = get_heap_end();
|
||||
}
|
||||
|
||||
void *malloc(size_t size) {
|
||||
Region *region_end = region_after(next, size);
|
||||
|
||||
if (region_end > heap_end) {
|
||||
if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) {
|
||||
return NULL;
|
||||
}
|
||||
if (!grow_heap(size)) return NULL;
|
||||
heap_end = get_heap_end();
|
||||
}
|
||||
|
||||
void *result = &next->data;
|
||||
next->size = size;
|
||||
next = region_end;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void free(void *ptr) {
|
||||
if (ptr == NULL) return;
|
||||
|
||||
Region *region = region_for_ptr(ptr);
|
||||
Region *region_end = region_after(region, region->size);
|
||||
|
||||
// When freeing the last allocated pointer, re-use that
|
||||
// pointer for the next allocation.
|
||||
if (region_end == next) {
|
||||
next = region;
|
||||
}
|
||||
}
|
||||
|
||||
void *calloc(size_t count, size_t size) {
|
||||
void *result = malloc(count * size);
|
||||
memset(result, 0, count * size);
|
||||
return result;
|
||||
}
|
||||
|
||||
void *realloc(void *ptr, size_t new_size) {
|
||||
if (ptr == NULL) {
|
||||
return malloc(new_size);
|
||||
}
|
||||
|
||||
Region *region = region_for_ptr(ptr);
|
||||
Region *region_end = region_after(region, region->size);
|
||||
|
||||
// When reallocating the last allocated region, return
|
||||
// the same pointer, and skip copying the data.
|
||||
if (region_end == next) {
|
||||
next = region;
|
||||
return malloc(new_size);
|
||||
}
|
||||
|
||||
void *result = malloc(new_size);
|
||||
memcpy(result, ®ion->data, region->size);
|
||||
return result;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,31 @@
|
||||
#ifndef TREE_SITTER_WASM_H_
|
||||
#define TREE_SITTER_WASM_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "tree_sitter/api.h"
|
||||
#include "./parser.h"
|
||||
|
||||
bool ts_wasm_store_start(TSWasmStore *, TSLexer *, const TSLanguage *);
|
||||
void ts_wasm_store_reset(TSWasmStore *);
|
||||
bool ts_wasm_store_has_error(const TSWasmStore *);
|
||||
|
||||
bool ts_wasm_store_call_lex_main(TSWasmStore *, TSStateId);
|
||||
bool ts_wasm_store_call_lex_keyword(TSWasmStore *, TSStateId);
|
||||
|
||||
uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *);
|
||||
void ts_wasm_store_call_scanner_destroy(TSWasmStore *, uint32_t);
|
||||
bool ts_wasm_store_call_scanner_scan(TSWasmStore *, uint32_t, uint32_t);
|
||||
uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *, uint32_t, char *);
|
||||
void ts_wasm_store_call_scanner_deserialize(TSWasmStore *, uint32_t, const char *, unsigned);
|
||||
|
||||
void ts_wasm_language_retain(const TSLanguage *);
|
||||
void ts_wasm_language_release(const TSLanguage *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_WASM_H_
|
@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
REMOTE=https://github.com/tree-sitter/tree-sitter.git
|
||||
BRANCH=v0.22.5
|
||||
|
||||
rm -rf tree-sitter
|
||||
rm -rf tmp
|
||||
git clone --depth 1 --branch $BRANCH $REMOTE tmp
|
||||
mkdir tree-sitter
|
||||
mv tmp/lib/src tree-sitter
|
||||
mv tmp/lib/include tree-sitter
|
||||
mv tmp/LICENSE tree-sitter
|
||||
rm -rf tmp
|
Loading…
Reference in New Issue