#define _GNU_SOURCE #include "protocol.h" #include "helpers.h" #include <string.h> #include <ctype.h> #include <arpa/inet.h> #include <string.h> // find N level domain static bool FindNLD(const uint8_t *dom, size_t dlen, int level, const uint8_t **p, size_t *len) { int i; const uint8_t *p1,*p2; for (i=1,p2=dom+dlen;i<level;i++) { for (p2--; p2>dom && *p2!='.'; p2--); if (p2<=dom) return false; } for (p1=p2-1 ; p1>dom && *p1!='.'; p1--); if (*p1=='.') p1++; if (p) *p = p1; if (len) *len = p2-p1; return true; } const char *l7proto_str(t_l7proto l7) { switch(l7) { case HTTP: return "http"; case TLS: return "tls"; case QUIC: return "quic"; case WIREGUARD: return "wireguard"; case DHT: return "dht"; default: return "unknown"; } } bool l7_proto_match(t_l7proto l7proto, uint32_t filter_l7) { return (l7proto==UNKNOWN && (filter_l7 & L7_PROTO_UNKNOWN)) || (l7proto==HTTP && (filter_l7 & L7_PROTO_HTTP)) || (l7proto==TLS && (filter_l7 & L7_PROTO_TLS)) || (l7proto==QUIC && (filter_l7 & L7_PROTO_QUIC)) || (l7proto==WIREGUARD && (filter_l7 & L7_PROTO_WIREGUARD)) || (l7proto==DHT && (filter_l7 & L7_PROTO_DHT)); } #define PM_ABS 0 #define PM_HOST 1 #define PM_HOST_END 2 #define PM_HOST_SLD 3 #define PM_HOST_MIDSLD 4 #define PM_HOST_ENDSLD 5 #define PM_HTTP_METHOD 6 #define PM_SNI_EXT 7 bool IsHostMarker(uint8_t posmarker) { switch(posmarker) { case PM_HOST: case PM_HOST_END: case PM_HOST_SLD: case PM_HOST_MIDSLD: case PM_HOST_ENDSLD: return true; default: return false; } } const char *posmarker_name(uint8_t posmarker) { switch(posmarker) { case PM_ABS: return "abs"; case PM_HOST: return "host"; case PM_HOST_END: return "endhost"; case PM_HOST_SLD: return "sld"; case PM_HOST_MIDSLD: return "midsld"; case PM_HOST_ENDSLD: return "endsld"; case PM_HTTP_METHOD: return "method"; case PM_SNI_EXT: return "sniext"; default: return "?"; } } static size_t CheckPos(size_t sz, ssize_t offset) { return (offset>=0 && offset<sz) ? offset : 0; } size_t AnyProtoPos(uint8_t posmarker, int16_t pos, const uint8_t *data, size_t sz) { ssize_t offset; switch(posmarker) { case PM_ABS: offset = (pos<0) ? sz+pos : pos; return CheckPos(sz,offset); default: return 0; } } static size_t HostPos(uint8_t posmarker, int16_t pos, const uint8_t *data, size_t sz, size_t offset_host, size_t len_host) { ssize_t offset; const uint8_t *p; size_t slen; switch(posmarker) { case PM_HOST: offset = offset_host+pos; break; case PM_HOST_END: offset = offset_host+len_host+pos; break; case PM_HOST_SLD: case PM_HOST_MIDSLD: case PM_HOST_ENDSLD: if (((offset_host+len_host)<=sz) && FindNLD(data+offset_host,len_host,2,&p,&slen)) offset = (posmarker==PM_HOST_SLD ? p-data : posmarker==PM_HOST_ENDSLD ? p-data+slen : slen==1 ? p+1-data : p+slen/2-data) + pos; else offset = 0; break; } return CheckPos(sz,offset); } size_t ResolvePos(const uint8_t *data, size_t sz, t_l7proto l7proto, const struct proto_pos *sp) { switch(l7proto) { case HTTP: return HttpPos(sp->marker, sp->pos, data, sz); case TLS: return TLSPos(sp->marker, sp->pos, data, sz); default: return AnyProtoPos(sp->marker, sp->pos, data, sz); } } void ResolveMultiPos(const uint8_t *data, size_t sz, t_l7proto l7proto, const struct proto_pos *splits, int split_count, size_t *pos, int *pos_count) { int i,j; for(i=j=0;i<split_count;i++) { pos[j] = ResolvePos(data,sz,l7proto,splits+i); if (pos[j]) j++; } qsort_size_t(pos, j); j=unique_size_t(pos, j); *pos_count=j; } const char *http_methods[] = { "GET /","POST /","HEAD /","OPTIONS /","PUT /","DELETE /","CONNECT /","TRACE /",NULL }; const char *HttpMethod(const uint8_t *data, size_t len) { const char **method; size_t method_len; for (method = http_methods; *method; method++) { method_len = strlen(*method); if (method_len <= len && !memcmp(data, *method, method_len)) return *method; } return NULL; } bool IsHttp(const uint8_t *data, size_t len) { return !!HttpMethod(data,len); } static bool IsHostAt(const uint8_t *p) { return \ p[0]=='\n' && (p[1]=='H' || p[1]=='h') && (p[2]=='o' || p[2]=='O') && (p[3]=='s' || p[3]=='S') && (p[4]=='t' || p[4]=='T') && p[5]==':'; } static uint8_t *FindHostIn(uint8_t *buf, size_t bs) { size_t pos; if (bs<6) return NULL; bs-=6; for(pos=0;pos<=bs;pos++) if (IsHostAt(buf+pos)) return buf+pos; return NULL; } static const uint8_t *FindHostInConst(const uint8_t *buf, size_t bs) { size_t pos; if (bs<6) return NULL; bs-=6; for(pos=0;pos<=bs;pos++) if (IsHostAt(buf+pos)) return buf+pos; return NULL; } // pHost points to "Host: ..." bool HttpFindHost(uint8_t **pHost,uint8_t *buf,size_t bs) { if (!*pHost) { *pHost = FindHostIn(buf, bs); if (*pHost) (*pHost)++; } return !!*pHost; } bool HttpFindHostConst(const uint8_t **pHost,const uint8_t *buf,size_t bs) { if (!*pHost) { *pHost = FindHostInConst(buf, bs); if (*pHost) (*pHost)++; } return !!*pHost; } bool IsHttpReply(const uint8_t *data, size_t len) { // HTTP/1.x 200\r\n return len>14 && !memcmp(data,"HTTP/1.",7) && (data[7]=='0' || data[7]=='1') && data[8]==' ' && data[9]>='0' && data[9]<='9' && data[10]>='0' && data[10]<='9' && data[11]>='0' && data[11]<='9'; } int HttpReplyCode(const uint8_t *data, size_t len) { return (data[9]-'0')*100 + (data[10]-'0')*10 + (data[11]-'0'); } bool HttpExtractHeader(const uint8_t *data, size_t len, const char *header, char *buf, size_t len_buf) { const uint8_t *p, *s, *e = data + len; p = (uint8_t*)strncasestr((char*)data, header, len); if (!p) return false; p += strlen(header); while (p < e && (*p == ' ' || *p == '\t')) p++; s = p; while (s < e && (*s != '\r' && *s != '\n' && *s != ' ' && *s != '\t')) s++; if (s > p) { size_t slen = s - p; if (buf && len_buf) { if (slen >= len_buf) slen = len_buf - 1; for (size_t i = 0; i < slen; i++) buf[i] = tolower(p[i]); buf[slen] = 0; } return true; } return false; } bool HttpExtractHost(const uint8_t *data, size_t len, char *host, size_t len_host) { return HttpExtractHeader(data, len, "\nHost:", host, len_host); } // DPI redirects are global redirects to another domain bool HttpReplyLooksLikeDPIRedirect(const uint8_t *data, size_t len, const char *host) { char loc[256],*redirect_host, *p; int code; if (!host || !*host) return false; code = HttpReplyCode(data,len); if ((code!=302 && code!=307) || !HttpExtractHeader(data,len,"\nLocation:",loc,sizeof(loc))) return false; // something like : https://censor.net/badpage.php?reason=denied&source=RKN if (!strncmp(loc,"http://",7)) redirect_host=loc+7; else if (!strncmp(loc,"https://",8)) redirect_host=loc+8; else return false; // somethinkg like : censor.net/badpage.php?reason=denied&source=RKN for(p=redirect_host; *p && *p!='/' ; p++); *p=0; if (!*redirect_host) return false; // somethinkg like : censor.net // extract 2nd level domains const char *dhost, *drhost; if (!FindNLD((uint8_t*)host,strlen(host),2,(const uint8_t**)&dhost,NULL) || !FindNLD((uint8_t*)redirect_host,strlen(redirect_host),2,(const uint8_t**)&drhost,NULL)) return false; // compare 2nd level domains return strcasecmp(dhost, drhost)!=0; } size_t HttpPos(uint8_t posmarker, int16_t pos, const uint8_t *data, size_t sz) { const uint8_t *method, *host=NULL, *p; size_t offset_host,len_host; ssize_t offset; int i; switch(posmarker) { case PM_HTTP_METHOD: // recognize some tpws pre-applied hacks method=data; if (sz<10) break; if (*method=='\n' || *method=='\r') method++; if (*method=='\n' || *method=='\r') method++; for (p=method,i=0;i<7;i++) if (*p>='A' && *p<='Z') p++; if (i<3 || *p!=' ') break; return CheckPos(sz,method-data+pos); case PM_HOST: case PM_HOST_END: case PM_HOST_SLD: case PM_HOST_MIDSLD: case PM_HOST_ENDSLD: if (HttpFindHostConst(&host,data,sz) && (host-data+7)<sz) { host+=5; if (*host==' ' || *host=='\t') host++; offset_host = host-data; if (posmarker!=PM_HOST) for (len_host=0; (offset_host+len_host)<sz && data[offset_host+len_host]!='\r' && data[offset_host+len_host]!='\n'; len_host++); else len_host = 0; return HostPos(posmarker,pos,data,sz,offset_host,len_host); } break; default: return AnyProtoPos(posmarker,pos,data,sz); } return 0; } uint16_t TLSRecordDataLen(const uint8_t *data) { return pntoh16(data + 3); } size_t TLSRecordLen(const uint8_t *data) { return TLSRecordDataLen(data) + 5; } bool IsTLSRecordFull(const uint8_t *data, size_t len) { return TLSRecordLen(data)<=len; } bool IsTLSClientHello(const uint8_t *data, size_t len, bool bPartialIsOK) { return len >= 6 && data[0] == 0x16 && data[1] == 0x03 && data[2] <= 0x03 && data[5] == 0x01 && (bPartialIsOK || TLSRecordLen(data) <= len); } // bPartialIsOK=true - accept partial packets not containing the whole TLS message bool TLSFindExtInHandshake(const uint8_t *data, size_t len, uint16_t type, const uint8_t **ext, size_t *len_ext, bool bPartialIsOK) { // +0 // u8 HandshakeType: ClientHello // u24 Length // u16 Version // c[32] random // u8 SessionIDLength // <SessionID> // u16 CipherSuitesLength // <CipherSuites> // u8 CompressionMethodsLength // <CompressionMethods> // u16 ExtensionsLength size_t l, ll; l = 1 + 3 + 2 + 32; // SessionIDLength if (len < (l + 1)) return false; if (!bPartialIsOK) { ll = data[1] << 16 | data[2] << 8 | data[3]; // HandshakeProtocol length if (len < (ll + 4)) return false; } l += data[l] + 1; // CipherSuitesLength if (len < (l + 2)) return false; l += pntoh16(data + l) + 2; // CompressionMethodsLength if (len < (l + 1)) return false; l += data[l] + 1; // ExtensionsLength if (len < (l + 2)) return false; data += l; len -= l; l = pntoh16(data); data += 2; len -= 2; if (bPartialIsOK) { if (len < l) l = len; } else { if (len < l) return false; } while (l >= 4) { uint16_t etype = pntoh16(data); size_t elen = pntoh16(data + 2); data += 4; l -= 4; if (l < elen) break; if (etype == type) { if (ext && len_ext) { *ext = data; *len_ext = elen; } return true; } data += elen; l -= elen; } return false; } bool TLSFindExt(const uint8_t *data, size_t len, uint16_t type, const uint8_t **ext, size_t *len_ext, bool bPartialIsOK) { // +0 // u8 ContentType: Handshake // u16 Version: TLS1.0 // u16 Length size_t reclen; if (!IsTLSClientHello(data, len, bPartialIsOK)) return false; reclen=TLSRecordLen(data); if (reclen<len) len=reclen; // correct len if it has more data than the first tls record has return TLSFindExtInHandshake(data + 5, len - 5, type, ext, len_ext, bPartialIsOK); } static bool TLSAdvanceToHostInSNI(const uint8_t **ext, size_t *elen, size_t *slen) { // u16 data+0 - name list length // u8 data+2 - server name type. 0=host_name // u16 data+3 - server name length if (*elen < 5 || (*ext)[2] != 0) return false; *slen = pntoh16(*ext + 3); *ext += 5; *elen -= 5; return *slen <= *elen; } static bool TLSExtractHostFromExt(const uint8_t *ext, size_t elen, char *host, size_t len_host) { // u16 data+0 - name list length // u8 data+2 - server name type. 0=host_name // u16 data+3 - server name length size_t slen; if (!TLSAdvanceToHostInSNI(&ext,&elen,&slen)) return false; if (host && len_host) { if (slen >= len_host) slen = len_host - 1; for (size_t i = 0; i < slen; i++) host[i] = tolower(ext[i]); host[slen] = 0; } return true; } bool TLSHelloExtractHost(const uint8_t *data, size_t len, char *host, size_t len_host, bool bPartialIsOK) { const uint8_t *ext; size_t elen; if (!TLSFindExt(data, len, 0, &ext, &elen, bPartialIsOK)) return false; return TLSExtractHostFromExt(ext, elen, host, len_host); } bool TLSHelloExtractHostFromHandshake(const uint8_t *data, size_t len, char *host, size_t len_host, bool bPartialIsOK) { const uint8_t *ext; size_t elen; if (!TLSFindExtInHandshake(data, len, 0, &ext, &elen, bPartialIsOK)) return false; return TLSExtractHostFromExt(ext, elen, host, len_host); } // find N level domain in SNI static bool TLSHelloFindNLDInSNI(const uint8_t *ext, size_t elen, int level, const uint8_t **p, size_t *len) { size_t slen; return TLSAdvanceToHostInSNI(&ext,&elen,&slen) && FindNLD(ext,slen,level,p,len); } // find the middle of second level domain (SLD) in SNI ext : www.sobaka.ru => aka.ru // return false if SNI ext is bad or SLD is not found static bool TLSHelloFindMiddleOfSLDInSNI(const uint8_t *ext, size_t elen, const uint8_t **p) { size_t len; if (!TLSHelloFindNLDInSNI(ext,elen,2,p,&len)) return false; // in case of one letter SLD (x.com) we split at '.' to prevent appearance of the whole SLD *p = (len==1) ? *p+1 : *p+len/2; return true; } size_t TLSPos(uint8_t posmarker, int16_t pos, const uint8_t *data, size_t sz) { size_t elen; const uint8_t *ext, *p; size_t offset_host,len_host; ssize_t offset; switch(posmarker) { case PM_HOST: case PM_HOST_END: case PM_HOST_SLD: case PM_HOST_MIDSLD: case PM_HOST_ENDSLD: case PM_SNI_EXT: if (TLSFindExt(data,sz,0,&ext,&elen,false)) { if (posmarker==PM_SNI_EXT) { return CheckPos(sz,ext-data+pos); } else { if (!TLSAdvanceToHostInSNI(&ext,&elen,&len_host)) return 0; offset_host = ext-data; return HostPos(posmarker,pos,data,sz,offset_host,len_host); } } return 0; default: return AnyProtoPos(posmarker,pos,data,sz); } }